MON-224 Monitors imported from the Actualys ones using the exported json
This commit is contained in:
parent
2e5ac912c1
commit
54a996e8b7
@ -45,19 +45,19 @@ variable "cpu_utilization_message" {
|
|||||||
variable "cpu_utilization_timeframe" {
|
variable "cpu_utilization_timeframe" {
|
||||||
description = "Timeframe for the CPU Utilization monitor"
|
description = "Timeframe for the CPU Utilization monitor"
|
||||||
type = "string"
|
type = "string"
|
||||||
default = "last_30m"
|
default = "last_15m"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "cpu_utilization_threshold_warning" {
|
variable "cpu_utilization_threshold_warning" {
|
||||||
description = "CPU Utilization in fraction (warning threshold)"
|
description = "CPU Utilization in percentage (warning threshold)"
|
||||||
type = "string"
|
type = "string"
|
||||||
default = 0.8
|
default = 80
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "cpu_utilization_threshold_critical" {
|
variable "cpu_utilization_threshold_critical" {
|
||||||
description = "CPU Utilization in fraction (critical threshold)"
|
description = "CPU Utilization in percentage (critical threshold)"
|
||||||
type = "string"
|
type = "string"
|
||||||
default = 0.9
|
default = 90
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "cpu_utilization_silenced" {
|
variable "cpu_utilization_silenced" {
|
||||||
@ -73,7 +73,7 @@ variable "cpu_utilization_extra_tags" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
# DISK
|
# DISK Utilization
|
||||||
#
|
#
|
||||||
variable "disk_utilization_message" {
|
variable "disk_utilization_message" {
|
||||||
description = "Custom message for the Disk Utilization monitor"
|
description = "Custom message for the Disk Utilization monitor"
|
||||||
@ -88,15 +88,15 @@ variable "disk_utilization_timeframe" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
variable "disk_utilization_threshold_warning" {
|
variable "disk_utilization_threshold_warning" {
|
||||||
description = "Disk Utilization in fraction (warning threshold)"
|
description = "Disk Utilization in percentage (warning threshold)"
|
||||||
type = "string"
|
type = "string"
|
||||||
default = 0.8
|
default = 80
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "disk_utilization_threshold_critical" {
|
variable "disk_utilization_threshold_critical" {
|
||||||
description = "Disk Utilization in fraction (critical threshold)"
|
description = "Disk Utilization in percentage (critical threshold)"
|
||||||
type = "string"
|
type = "string"
|
||||||
default = 0.9
|
default = 90
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "disk_utilization_silenced" {
|
variable "disk_utilization_silenced" {
|
||||||
@ -111,6 +111,45 @@ variable "disk_utilization_extra_tags" {
|
|||||||
default = []
|
default = []
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# DISK Utilization Forecast
|
||||||
|
#
|
||||||
|
variable "disk_utilization_forecast_message" {
|
||||||
|
description = "Custom message for the Disk Utilization monitor"
|
||||||
|
type = "string"
|
||||||
|
default = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_utilization_forecast_timeframe" {
|
||||||
|
description = "Timeframe for the Disk Utilization monitor"
|
||||||
|
type = "string"
|
||||||
|
default = "next_1w"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_utilization_forecast_threshold_critical" {
|
||||||
|
description = "Disk Utilization in percentage (critical threshold)"
|
||||||
|
type = "string"
|
||||||
|
default = 80
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_utilization_forecast_threshold_critical_recovery" {
|
||||||
|
description = "Disk Utilization in percentage (recovery threshold)"
|
||||||
|
type = "string"
|
||||||
|
default = 72
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_utilization_forecast_silenced" {
|
||||||
|
description = "Groups to mute for GCP Cloud SQL Disk Utilization monitor"
|
||||||
|
type = "map"
|
||||||
|
default = {}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_utilization_forecast_extra_tags" {
|
||||||
|
description = "Extra tags for GCP Cloud SQL CPU Utilization monitor"
|
||||||
|
type = "list"
|
||||||
|
default = []
|
||||||
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
# Memory Utilization
|
# Memory Utilization
|
||||||
#
|
#
|
||||||
@ -125,13 +164,13 @@ variable "memory_utilization_timeframe" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
variable "memory_utilization_threshold_warning" {
|
variable "memory_utilization_threshold_warning" {
|
||||||
description = "Memory Utilization in fraction (warning threshold)"
|
description = "Memory Utilization in percentage (warning threshold)"
|
||||||
default = 0.8
|
default = 80
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "memory_utilization_threshold_critical" {
|
variable "memory_utilization_threshold_critical" {
|
||||||
description = "Memory Utilization in fraction (critical threshold)"
|
description = "Memory Utilization in percentage (critical threshold)"
|
||||||
default = 0.9
|
default = 90
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "memory_utilization_silenced" {
|
variable "memory_utilization_silenced" {
|
||||||
@ -169,14 +208,14 @@ variable "memory_utilization_forecast_history" {
|
|||||||
default = "12h"
|
default = "12h"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "memory_utilization_forecast_threshold_warning" {
|
variable "memory_utilization_forecast_threshold_critical" {
|
||||||
description = "Memory Utilization Forecast in fraction (warning threshold)"
|
description = "Memory Utilization Forecast in percentage (warning threshold)"
|
||||||
default = 0.8
|
default = 90
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "memory_utilization_forecast_threshold_critical" {
|
variable "memory_utilization_forecast_threshold_critical_recovery" {
|
||||||
description = "Memory Utilization Forecast in fraction (critical threshold)"
|
description = "Memory Utilization Forecast in percentage (recovery threshold)"
|
||||||
default = 0.9
|
default = 81
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "memory_utilization_forecast_silenced" {
|
variable "memory_utilization_forecast_silenced" {
|
||||||
|
|||||||
@ -15,7 +15,7 @@ data "template_file" "filter" {
|
|||||||
# CPU Utilization
|
# CPU Utilization
|
||||||
#
|
#
|
||||||
resource "datadog_monitor" "cpu_utilization" {
|
resource "datadog_monitor" "cpu_utilization" {
|
||||||
name = "[${var.environment}] Cloud SQL CPU Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
name = "[${var.environment}] Cloud SQL CPU utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||||
message = "${coalesce(var.cpu_utilization_message, var.message)}"
|
message = "${coalesce(var.cpu_utilization_message, var.message)}"
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -23,7 +23,7 @@ resource "datadog_monitor" "cpu_utilization" {
|
|||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(${var.cpu_utilization_timeframe}):
|
avg(${var.cpu_utilization_timeframe}):
|
||||||
avg:gcp.cloudsql.database.cpu.utilization{${data.template_file.filter.rendered}}
|
avg:gcp.cloudsql.database.cpu.utilization{${data.template_file.filter.rendered}}
|
||||||
by {database_id}
|
by {database_id} * 100
|
||||||
> ${var.cpu_utilization_threshold_critical}
|
> ${var.cpu_utilization_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
@ -32,15 +32,18 @@ EOF
|
|||||||
critical = "${var.cpu_utilization_threshold_critical}"
|
critical = "${var.cpu_utilization_threshold_critical}"
|
||||||
}
|
}
|
||||||
|
|
||||||
notify_no_data = true
|
|
||||||
require_full_window = false
|
|
||||||
renotify_interval = 0
|
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
|
locked = false
|
||||||
timeout_h = 0
|
timeout_h = 0
|
||||||
include_tags = true
|
include_tags = true
|
||||||
locked = false
|
no_data_timeframe = 30
|
||||||
|
require_full_window = false
|
||||||
|
notify_no_data = true
|
||||||
|
renotify_interval = 0
|
||||||
|
|
||||||
evaluation_delay = "${var.delay}"
|
evaluation_delay = "${var.delay}"
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
|
|
||||||
silenced = "${var.cpu_utilization_silenced}"
|
silenced = "${var.cpu_utilization_silenced}"
|
||||||
|
|
||||||
tags = [
|
tags = [
|
||||||
@ -56,7 +59,7 @@ EOF
|
|||||||
# Disk Utilization
|
# Disk Utilization
|
||||||
#
|
#
|
||||||
resource "datadog_monitor" "disk_utilization" {
|
resource "datadog_monitor" "disk_utilization" {
|
||||||
name = "[${var.environment}] Cloud SQL Disk Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
name = "[${var.environment}] Cloud SQL Disk utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||||
message = "${coalesce(var.disk_utilization_message, var.message)}"
|
message = "${coalesce(var.disk_utilization_message, var.message)}"
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -64,7 +67,7 @@ resource "datadog_monitor" "disk_utilization" {
|
|||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(${var.disk_utilization_timeframe}):
|
avg(${var.disk_utilization_timeframe}):
|
||||||
avg:gcp.cloudsql.database.disk.utilization{${data.template_file.filter.rendered}}
|
avg:gcp.cloudsql.database.disk.utilization{${data.template_file.filter.rendered}}
|
||||||
by {database_id}
|
by {database_id} *100
|
||||||
> ${var.disk_utilization_threshold_critical}
|
> ${var.disk_utilization_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
@ -73,15 +76,18 @@ EOF
|
|||||||
critical = "${var.disk_utilization_threshold_critical}"
|
critical = "${var.disk_utilization_threshold_critical}"
|
||||||
}
|
}
|
||||||
|
|
||||||
notify_no_data = true
|
|
||||||
require_full_window = false
|
|
||||||
renotify_interval = 0
|
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
|
locked = false
|
||||||
timeout_h = 0
|
timeout_h = 0
|
||||||
include_tags = true
|
include_tags = true
|
||||||
locked = false
|
no_data_timeframe = 20
|
||||||
|
require_full_window = false
|
||||||
|
notify_no_data = true
|
||||||
|
renotify_interval = 0
|
||||||
|
|
||||||
evaluation_delay = "${var.delay}"
|
evaluation_delay = "${var.delay}"
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
|
|
||||||
silenced = "${var.disk_utilization_silenced}"
|
silenced = "${var.disk_utilization_silenced}"
|
||||||
|
|
||||||
tags = [
|
tags = [
|
||||||
@ -93,6 +99,55 @@ EOF
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Disk Utilization Forecast
|
||||||
|
#
|
||||||
|
resource "datadog_monitor" "disk_utilization_forecast" {
|
||||||
|
name = "[${var.environment}] Cloud SQL Disk utilization could reach {{#is_alert}}{{threshold}}%{{/is_alert}} in a near future"
|
||||||
|
message = "${coalesce(var.disk_utilization_forecast_message, var.message)}"
|
||||||
|
|
||||||
|
type = "metric alert"
|
||||||
|
|
||||||
|
query = <<EOF
|
||||||
|
max(${var.disk_utilization_forecast_timeframe}):
|
||||||
|
forecast(
|
||||||
|
avg:gcp.cloudsql.database.disk.utilization{${data.template_file.filter.rendered}} by {database_id} * 100,
|
||||||
|
'linear',
|
||||||
|
1,
|
||||||
|
interval='60m',
|
||||||
|
history='3d',
|
||||||
|
model='default'
|
||||||
|
)
|
||||||
|
>= ${var.disk_utilization_forecast_threshold_critical}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
thresholds {
|
||||||
|
critical = "${var.disk_utilization_forecast_threshold_critical}"
|
||||||
|
critical_recovery = "${var.disk_utilization_forecast_threshold_critical_recovery}"
|
||||||
|
}
|
||||||
|
|
||||||
|
notify_audit = false
|
||||||
|
locked = false
|
||||||
|
timeout_h = 0
|
||||||
|
include_tags = true
|
||||||
|
require_full_window = false
|
||||||
|
notify_no_data = false
|
||||||
|
renotify_interval = 0
|
||||||
|
|
||||||
|
evaluation_delay = "${var.delay}"
|
||||||
|
new_host_delay = "${var.delay}"
|
||||||
|
|
||||||
|
silenced = "${var.disk_utilization_forecast_silenced}"
|
||||||
|
|
||||||
|
tags = [
|
||||||
|
"team:gcp",
|
||||||
|
"provider:gcp",
|
||||||
|
"env:${var.environment}",
|
||||||
|
"resource:cloud-sql",
|
||||||
|
"${var.disk_utilization_forecast_extra_tags}",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
# Memory Utilization
|
# Memory Utilization
|
||||||
#
|
#
|
||||||
@ -105,7 +160,7 @@ resource "datadog_monitor" "memory_utilization" {
|
|||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(${var.memory_utilization_timeframe}):
|
avg(${var.memory_utilization_timeframe}):
|
||||||
avg:gcp.cloudsql.database.memory.utilization{${data.template_file.filter.rendered}}
|
avg:gcp.cloudsql.database.memory.utilization{${data.template_file.filter.rendered}}
|
||||||
by {database_id}
|
by {database_id} * 100
|
||||||
> ${var.memory_utilization_threshold_critical}
|
> ${var.memory_utilization_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
@ -114,15 +169,18 @@ EOF
|
|||||||
critical = "${var.memory_utilization_threshold_critical}"
|
critical = "${var.memory_utilization_threshold_critical}"
|
||||||
}
|
}
|
||||||
|
|
||||||
notify_no_data = true
|
|
||||||
require_full_window = false
|
|
||||||
renotify_interval = 0
|
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
|
locked = false
|
||||||
timeout_h = 0
|
timeout_h = 0
|
||||||
include_tags = true
|
include_tags = true
|
||||||
locked = false
|
no_data_timeframe = 20
|
||||||
|
require_full_window = false
|
||||||
|
notify_no_data = true
|
||||||
|
renotify_interval = 0
|
||||||
|
|
||||||
evaluation_delay = "${var.delay}"
|
evaluation_delay = "${var.delay}"
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
|
|
||||||
silenced = "${var.memory_utilization_silenced}"
|
silenced = "${var.memory_utilization_silenced}"
|
||||||
|
|
||||||
tags = [
|
tags = [
|
||||||
@ -138,7 +196,7 @@ EOF
|
|||||||
# Memory Utilization Forecast
|
# Memory Utilization Forecast
|
||||||
#
|
#
|
||||||
resource "datadog_monitor" "memory_utilization_forecast" {
|
resource "datadog_monitor" "memory_utilization_forecast" {
|
||||||
name = "[${var.environment}] Cloud SQL Memory Utilization Forecast {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
name = "[${var.environment}] Cloud SQL Memory Utilization could reach {{#is_alert}}{{threshold}}%{{/is_alert}} in a near future"
|
||||||
message = "${coalesce(var.memory_utilization_forecast_message, var.message)}"
|
message = "${coalesce(var.memory_utilization_forecast_message, var.message)}"
|
||||||
|
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
@ -146,30 +204,32 @@ resource "datadog_monitor" "memory_utilization_forecast" {
|
|||||||
query = <<EOF
|
query = <<EOF
|
||||||
max(${var.memory_utilization_forecast_timeframe}):
|
max(${var.memory_utilization_forecast_timeframe}):
|
||||||
forecast(
|
forecast(
|
||||||
avg:gcp.cloudsql.database.memory.utilization{${data.template_file.filter.rendered}} by {database_id},
|
avg:gcp.cloudsql.database.memory.utilization{${data.template_file.filter.rendered}} by {database_id} * 100,
|
||||||
'linear',
|
'linear',
|
||||||
1,
|
1,
|
||||||
interval='${var.memory_utilization_forecast_interval}',
|
interval='${var.memory_utilization_forecast_interval}',
|
||||||
history='${var.memory_utilization_forecast_history}',
|
history='${var.memory_utilization_forecast_history}',
|
||||||
model='default'
|
model='default'
|
||||||
)
|
)
|
||||||
> ${var.memory_utilization_forecast_threshold_critical}
|
>= ${var.memory_utilization_forecast_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
thresholds {
|
thresholds {
|
||||||
warning = "${var.memory_utilization_forecast_threshold_warning}"
|
|
||||||
critical = "${var.memory_utilization_forecast_threshold_critical}"
|
critical = "${var.memory_utilization_forecast_threshold_critical}"
|
||||||
|
critical_recovery = "${var.memory_utilization_forecast_threshold_critical_recovery}"
|
||||||
}
|
}
|
||||||
|
|
||||||
notify_no_data = true
|
|
||||||
require_full_window = false
|
|
||||||
renotify_interval = 0
|
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
|
locked = false
|
||||||
timeout_h = 0
|
timeout_h = 0
|
||||||
include_tags = true
|
include_tags = true
|
||||||
locked = false
|
require_full_window = false
|
||||||
|
notify_no_data = false
|
||||||
|
renotify_interval = 0
|
||||||
|
|
||||||
evaluation_delay = "${var.delay}"
|
evaluation_delay = "${var.delay}"
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
|
|
||||||
silenced = "${var.memory_utilization_forecast_silenced}"
|
silenced = "${var.memory_utilization_forecast_silenced}"
|
||||||
|
|
||||||
tags = [
|
tags = [
|
||||||
@ -185,7 +245,7 @@ EOF
|
|||||||
# Failover Unavailable
|
# Failover Unavailable
|
||||||
#
|
#
|
||||||
resource "datadog_monitor" "failover_unavailable" {
|
resource "datadog_monitor" "failover_unavailable" {
|
||||||
name = "[${var.environment}] Cloud SQL Failover Unavailable {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
name = "[${var.environment}] Cloud SQL Failover Unavailable"
|
||||||
message = "${coalesce(var.failover_unavailable_message, var.message)}"
|
message = "${coalesce(var.failover_unavailable_message, var.message)}"
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -201,15 +261,18 @@ EOF
|
|||||||
critical = "${var.failover_unavailable_threshold_critical}"
|
critical = "${var.failover_unavailable_threshold_critical}"
|
||||||
}
|
}
|
||||||
|
|
||||||
notify_no_data = true
|
|
||||||
require_full_window = false
|
|
||||||
renotify_interval = 0
|
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
|
locked = false
|
||||||
timeout_h = 0
|
timeout_h = 0
|
||||||
include_tags = true
|
include_tags = true
|
||||||
locked = false
|
no_data_timeframe = 20
|
||||||
|
require_full_window = false
|
||||||
|
notify_no_data = true
|
||||||
|
renotify_interval = 0
|
||||||
|
|
||||||
evaluation_delay = "${var.delay}"
|
evaluation_delay = "${var.delay}"
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
|
|
||||||
silenced = "${var.failover_unavailable_silenced}"
|
silenced = "${var.failover_unavailable_silenced}"
|
||||||
|
|
||||||
tags = [
|
tags = [
|
||||||
|
|||||||
@ -96,13 +96,13 @@ variable "replication_lag_timeframe" {
|
|||||||
variable "replication_lag_threshold_warning" {
|
variable "replication_lag_threshold_warning" {
|
||||||
description = "Seconds behind the master (warning threshold)"
|
description = "Seconds behind the master (warning threshold)"
|
||||||
type = "string"
|
type = "string"
|
||||||
default = 300
|
default = 90
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "replication_lag_threshold_critical" {
|
variable "replication_lag_threshold_critical" {
|
||||||
description = "Seconds behind the master (critical threshold)"
|
description = "Seconds behind the master (critical threshold)"
|
||||||
type = "string"
|
type = "string"
|
||||||
default = 900
|
default = 180
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "replication_lag_silenced" {
|
variable "replication_lag_silenced" {
|
||||||
@ -135,13 +135,13 @@ variable "queries_changing_message" {
|
|||||||
variable "queries_changing_timeframe" {
|
variable "queries_changing_timeframe" {
|
||||||
description = "Timeframe for the Queries Changing mon monitor"
|
description = "Timeframe for the Queries Changing mon monitor"
|
||||||
type = "string"
|
type = "string"
|
||||||
default = "last_10m"
|
default = "last_1h"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "queries_changing_anomaly_detection_algorithm" {
|
variable "queries_changing_anomaly_detection_algorithm" {
|
||||||
description = "Anomaly Detection Algorithm used"
|
description = "Anomaly Detection Algorithm used"
|
||||||
type = "string"
|
type = "string"
|
||||||
default = "robust"
|
default = "agile"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "queries_changing_deviations" {
|
variable "queries_changing_deviations" {
|
||||||
@ -174,6 +174,12 @@ variable "queries_changing_threshold_critical" {
|
|||||||
default = 1
|
default = 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "queries_changing_threshold_critical_recovery" {
|
||||||
|
description = "Queries Changing critical recovery threshold"
|
||||||
|
type = "string"
|
||||||
|
default = 0.99
|
||||||
|
}
|
||||||
|
|
||||||
variable "queries_changing_silenced" {
|
variable "queries_changing_silenced" {
|
||||||
description = "Groups to mute for GCP Cloud SQL Queries Changing monitor"
|
description = "Groups to mute for GCP Cloud SQL Queries Changing monitor"
|
||||||
type = "map"
|
type = "map"
|
||||||
@ -198,7 +204,7 @@ variable "questions_changing_message" {
|
|||||||
variable "questions_changing_timeframe" {
|
variable "questions_changing_timeframe" {
|
||||||
description = "Timeframe for the Questions Changing monitor"
|
description = "Timeframe for the Questions Changing monitor"
|
||||||
type = "string"
|
type = "string"
|
||||||
default = "last_10m"
|
default = "last_1h"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "questions_changing_database_ids" {
|
variable "questions_changing_database_ids" {
|
||||||
@ -210,7 +216,7 @@ variable "questions_changing_database_ids" {
|
|||||||
variable "questions_changing_anomaly_detection_algorithm" {
|
variable "questions_changing_anomaly_detection_algorithm" {
|
||||||
description = "Anomaly Detection Algorithm used"
|
description = "Anomaly Detection Algorithm used"
|
||||||
type = "string"
|
type = "string"
|
||||||
default = "robust"
|
default = "agile"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "questions_changing_deviations" {
|
variable "questions_changing_deviations" {
|
||||||
@ -243,6 +249,12 @@ variable "questions_changing_threshold_critical" {
|
|||||||
default = 1
|
default = 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "questions_changing_threshold_critical_recovery" {
|
||||||
|
description = "Questions Changing critical recovery threshold"
|
||||||
|
type = "string"
|
||||||
|
default = 0.99
|
||||||
|
}
|
||||||
|
|
||||||
variable "questions_changing_silenced" {
|
variable "questions_changing_silenced" {
|
||||||
description = "Groups to mute for GCP Cloud SQL Questions Changing monitor"
|
description = "Groups to mute for GCP Cloud SQL Questions Changing monitor"
|
||||||
type = "map"
|
type = "map"
|
||||||
|
|||||||
@ -57,7 +57,7 @@ EOF
|
|||||||
# Replication Lag
|
# Replication Lag
|
||||||
#
|
#
|
||||||
resource "datadog_monitor" "replication_lag" {
|
resource "datadog_monitor" "replication_lag" {
|
||||||
name = "[${var.environment}] Cloud SQL MySQL Replication Lag too high"
|
name = "[${var.environment}] Cloud SQL MySQL Replication Lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}"
|
||||||
message = "${coalesce(var.replication_lag_message, var.message)}"
|
message = "${coalesce(var.replication_lag_message, var.message)}"
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -74,15 +74,18 @@ EOF
|
|||||||
warning = "${var.replication_lag_threshold_warning}"
|
warning = "${var.replication_lag_threshold_warning}"
|
||||||
}
|
}
|
||||||
|
|
||||||
notify_no_data = true
|
|
||||||
require_full_window = false
|
|
||||||
renotify_interval = 0
|
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
|
locked = false
|
||||||
timeout_h = 0
|
timeout_h = 0
|
||||||
include_tags = true
|
include_tags = true
|
||||||
locked = false
|
no_data_timeframe = 25
|
||||||
|
require_full_window = false
|
||||||
|
notify_no_data = true
|
||||||
|
renotify_interval = 0
|
||||||
|
|
||||||
evaluation_delay = "${var.delay}"
|
evaluation_delay = "${var.delay}"
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
|
|
||||||
silenced = "${var.replication_lag_silenced}"
|
silenced = "${var.replication_lag_silenced}"
|
||||||
|
|
||||||
tags = [
|
tags = [
|
||||||
@ -99,9 +102,7 @@ EOF
|
|||||||
# Queries Anomaly
|
# Queries Anomaly
|
||||||
#
|
#
|
||||||
resource "datadog_monitor" "queries_changing_anomaly" {
|
resource "datadog_monitor" "queries_changing_anomaly" {
|
||||||
count = "${length(var.queries_changing_database_ids)}"
|
name = "[${var.environment}] Cloud SQL MySQL Queries Count changed abnormally"
|
||||||
|
|
||||||
name = "[${var.environment}] [${var.queries_changing_database_ids[count.index]}] Cloud SQL MySQL Queries Count changed abnormally {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
|
||||||
message = "${coalesce(var.queries_changing_message, var.message)}"
|
message = "${coalesce(var.queries_changing_message, var.message)}"
|
||||||
|
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
@ -109,12 +110,13 @@ resource "datadog_monitor" "queries_changing_anomaly" {
|
|||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(${var.queries_changing_timeframe}):
|
avg(${var.queries_changing_timeframe}):
|
||||||
anomalies(
|
anomalies(
|
||||||
default(
|
avg:gcp.cloudsql.database.mysql.queries{${data.template_file.filter.rendered}} by {database_id}.as_count()
|
||||||
avg:gcp.cloudsql.database.mysql.queries{project_id:${var.project_id},database_id:${var.project_id}:${var.queries_changing_database_ids[count.index]}},
|
|
||||||
0),
|
|
||||||
'${var.queries_changing_anomaly_detection_algorithm}',
|
'${var.queries_changing_anomaly_detection_algorithm}',
|
||||||
${var.queries_changing_deviations},
|
${var.queries_changing_deviations},
|
||||||
direction='${var.queries_changing_direction}',
|
direction='${var.queries_changing_direction}',
|
||||||
|
alert_window='last_30m',
|
||||||
|
interval=20,
|
||||||
|
count_default_zero='false',
|
||||||
seasonality='${var.queries_changing_seasonality}'
|
seasonality='${var.queries_changing_seasonality}'
|
||||||
)
|
)
|
||||||
> ${var.queries_changing_threshold_critical}
|
> ${var.queries_changing_threshold_critical}
|
||||||
@ -123,17 +125,20 @@ EOF
|
|||||||
thresholds {
|
thresholds {
|
||||||
warning = "${var.queries_changing_threshold_warning}"
|
warning = "${var.queries_changing_threshold_warning}"
|
||||||
critical = "${var.queries_changing_threshold_critical}"
|
critical = "${var.queries_changing_threshold_critical}"
|
||||||
|
critical_recovery = "${var.queries_changing_threshold_critical_recovery}"
|
||||||
}
|
}
|
||||||
|
|
||||||
notify_no_data = false
|
|
||||||
require_full_window = false
|
|
||||||
renotify_interval = 0
|
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
|
locked = false
|
||||||
timeout_h = 0
|
timeout_h = 0
|
||||||
include_tags = true
|
include_tags = true
|
||||||
locked = false
|
require_full_window = false
|
||||||
|
notify_no_data = false
|
||||||
|
renotify_interval = 0
|
||||||
|
|
||||||
evaluation_delay = "${var.delay}"
|
evaluation_delay = "${var.delay}"
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
|
|
||||||
silenced = "${var.queries_changing_silenced}"
|
silenced = "${var.queries_changing_silenced}"
|
||||||
|
|
||||||
tags = [
|
tags = [
|
||||||
@ -142,7 +147,6 @@ EOF
|
|||||||
"env:${var.environment}",
|
"env:${var.environment}",
|
||||||
"resource:cloud-sql",
|
"resource:cloud-sql",
|
||||||
"engine:mysql",
|
"engine:mysql",
|
||||||
"database_id:${var.project_id}:${var.queries_changing_database_ids[count.index]}}",
|
|
||||||
"${var.queries_changing_extra_tags}",
|
"${var.queries_changing_extra_tags}",
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@ -151,22 +155,21 @@ EOF
|
|||||||
# Questions Anomaly
|
# Questions Anomaly
|
||||||
#
|
#
|
||||||
resource "datadog_monitor" "questions_changing_anomaly" {
|
resource "datadog_monitor" "questions_changing_anomaly" {
|
||||||
count = "${length(var.questions_changing_database_ids)}"
|
name = "[${var.environment}] Cloud SQL MySQL Questions Count changed abnormally"
|
||||||
|
|
||||||
name = "[${var.environment}] [${var.questions_changing_database_ids[count.index]}] Cloud SQL MySQL Questions Count changed abnormally {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
|
||||||
message = "${coalesce(var.questions_changing_message, var.message)}"
|
message = "${coalesce(var.questions_changing_message, var.message)}"
|
||||||
|
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(${var.questions_changing_timeframe}):
|
avg(last_1h):
|
||||||
anomalies(
|
anomalies(
|
||||||
default(
|
avg:gcp.cloudsql.database.mysql.questions{${data.template_file.filter.rendered}} by {database_id},
|
||||||
avg:gcp.cloudsql.database.mysql.questions{project_id:${var.project_id},database_id:${var.project_id}:${var.questions_changing_database_ids[count.index]}},
|
|
||||||
0),
|
|
||||||
'${var.questions_changing_anomaly_detection_algorithm}',
|
'${var.questions_changing_anomaly_detection_algorithm}',
|
||||||
${var.questions_changing_deviations},
|
${var.questions_changing_deviations},
|
||||||
direction='${var.questions_changing_direction}',
|
direction='${var.questions_changing_direction}',
|
||||||
|
alert_window='last_30m',
|
||||||
|
interval=20,
|
||||||
|
count_default_zero='false',
|
||||||
seasonality='${var.questions_changing_seasonality}'
|
seasonality='${var.questions_changing_seasonality}'
|
||||||
)
|
)
|
||||||
> ${var.questions_changing_threshold_critical}
|
> ${var.questions_changing_threshold_critical}
|
||||||
@ -175,17 +178,20 @@ EOF
|
|||||||
thresholds {
|
thresholds {
|
||||||
warning = "${var.questions_changing_threshold_warning}"
|
warning = "${var.questions_changing_threshold_warning}"
|
||||||
critical = "${var.questions_changing_threshold_critical}"
|
critical = "${var.questions_changing_threshold_critical}"
|
||||||
|
critical_recovery = "${var.questions_changing_threshold_critical_recovery}"
|
||||||
}
|
}
|
||||||
|
|
||||||
notify_no_data = false
|
|
||||||
require_full_window = false
|
|
||||||
renotify_interval = 0
|
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
|
locked = false
|
||||||
timeout_h = 0
|
timeout_h = 0
|
||||||
include_tags = true
|
include_tags = true
|
||||||
locked = false
|
require_full_window = false
|
||||||
|
notify_no_data = false
|
||||||
|
renotify_interval = 0
|
||||||
|
|
||||||
evaluation_delay = "${var.delay}"
|
evaluation_delay = "${var.delay}"
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
|
|
||||||
silenced = "${var.questions_changing_silenced}"
|
silenced = "${var.questions_changing_silenced}"
|
||||||
|
|
||||||
tags = [
|
tags = [
|
||||||
@ -194,7 +200,6 @@ EOF
|
|||||||
"env:${var.environment}",
|
"env:${var.environment}",
|
||||||
"resource:cloud-sql",
|
"resource:cloud-sql",
|
||||||
"engine:mysql",
|
"engine:mysql",
|
||||||
"database_id:${var.project_id}:${var.questions_changing_database_ids[count.index]}",
|
|
||||||
"${var.questions_changing_extra_tags}",
|
"${var.questions_changing_extra_tags}",
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user