MON-122 add aurora

This commit is contained in:
Quentin Manfroi 2018-09-14 19:30:39 +02:00
parent 7877118023
commit 81c523743b
4 changed files with 191 additions and 0 deletions

View File

@ -0,0 +1,61 @@
# Global Terraform
variable "environment" {
description = "Architecture Environment"
type = "string"
}
# Global DataDog
variable "evaluation_delay" {
description = "Delay in seconds for the metric evaluation"
default = 900
}
variable "new_host_delay" {
description = "Delay in seconds before monitor new resource"
default = 300
}
variable "message" {
description = "Message sent when an alert is triggered"
}
variable "filter_tags_use_defaults" {
description = "Use default filter tags convention"
default = "true"
}
variable "filter_tags_custom" {
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
default = "*"
}
# AWS RDS Aurora instance specific
variable "aurora_replicalag_silenced" {
description = "Groups to mute for RDS Aurora replica lag monitor"
type = "map"
default = {}
}
variable "aurora_replicalag_message" {
description = "Custom message for RDS Aurora replica lag monitor"
type = "string"
default = ""
}
variable "aurora_replicalag_timeframe" {
description = "Monitor timeframe for RDS Aurora replica lag monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "string"
default = "last_5m"
}
variable "aurora_replicalag_threshold_warning" {
description = "Aurora replica lag in milliseconds (warning threshold)"
default = "100"
}
variable "aurora_replicalag_threshold_critical" {
description = "Aurora replica lag in milliseconds (critical threshold)"
default = "200"
}

View File

@ -0,0 +1,34 @@
### RDS Aurora Mysql Replica Lag monitor ###
resource "datadog_monitor" "rds_aurora_mysql_replica_lag" {
name = "[${var.environment}] RDS Aurora Mysql replica lag {{#is_alert}}{{{comparator}}} {{threshold}} ms ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ms ({{value}}%){{/is_warning}}"
message = "${coalesce(var.aurora_replicalag_message, var.message)}"
type = "metric alert"
query = <<EOF
avg(${var.aurora_replicalag_timeframe}): (
avg:aws.rds.aws.rds.aurora_replica_lag{${data.template_file.filter.rendered}} by {region,name}
) > ${var.aurora_replicalag_threshold_critical}
EOF
thresholds {
warning = "${var.aurora_replicalag_threshold_warning}"
critical = "${var.aurora_replicalag_threshold_critical}"
}
notify_no_data = true
evaluation_delay = "${var.delay}"
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = false
new_host_delay = "${var.delay}"
silenced = "${var.aurora_replicalag_silenced}"
tags = ["env:${var.environment}", "resource:rds", "team:aws", "provider:aws"]
count = "${var.aurora_cluster_type == "mysql" ? 1 : 0}"
}

View File

@ -0,0 +1,61 @@
# Global Terraform
variable "environment" {
description = "Architecture Environment"
type = "string"
}
# Global DataDog
variable "evaluation_delay" {
description = "Delay in seconds for the metric evaluation"
default = 900
}
variable "new_host_delay" {
description = "Delay in seconds before monitor new resource"
default = 300
}
variable "message" {
description = "Message sent when an alert is triggered"
}
variable "filter_tags_use_defaults" {
description = "Use default filter tags convention"
default = "true"
}
variable "filter_tags_custom" {
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
default = "*"
}
# AWS RDS Aurora instance specific
variable "aurora_replicalag_silenced" {
description = "Groups to mute for RDS Aurora replica lag monitor"
type = "map"
default = {}
}
variable "aurora_replicalag_message" {
description = "Custom message for RDS Aurora replica lag monitor"
type = "string"
default = ""
}
variable "aurora_replicalag_timeframe" {
description = "Monitor timeframe for RDS Aurora replica lag monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "string"
default = "last_5m"
}
variable "aurora_replicalag_threshold_warning" {
description = "Aurora replica lag in milliseconds (warning threshold)"
default = "100"
}
variable "aurora_replicalag_threshold_critical" {
description = "Aurora replica lag in milliseconds (critical threshold)"
default = "200"
}

View File

@ -0,0 +1,35 @@
### RDS Aurora Postgresql Replica Lag monitor ###
resource "datadog_monitor" "rds_aurora_postgresql_replica_lag" {
name = "[${var.environment}] RDS Aurora PostgreSQL replica lag {{#is_alert}}{{{comparator}}} {{threshold}} ms ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ms ({{value}}%){{/is_warning}}"
message = "${coalesce(var.aurora_replicalag_message, var.message)}"
type = "metric alert"
query = <<EOF
avg(${var.aurora_replicalag_timeframe}): (
avg:aws.rds.rdsto_aurora_postgre_sqlreplica_lag{${data.template_file.filter.rendered}} by {region,name}
) > ${var.aurora_replicalag_threshold_critical}
EOF
thresholds {
warning = "${var.aurora_replicalag_threshold_warning}"
critical = "${var.aurora_replicalag_threshold_critical}"
}
notify_no_data = true
evaluation_delay = "${var.delay}"
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = false
new_host_delay = "${var.delay}"
silenced = "${var.aurora_replicalag_silenced}"
tags = ["env:${var.environment}", "resource:rds", "team:aws", "provider:aws"]
count = "${var.aurora_cluster_type == "postgresql" ? 1 : 0}"
}