MON-459: Bump to HCL 2 syntax / tf 0.12.1
This commit is contained in:
parent
d82101c06c
commit
b093f2f1e4
@ -11,7 +11,7 @@ before_script:
|
||||
- apk add --no-cache curl
|
||||
|
||||
auto_update:
|
||||
image: hashicorp/terraform:0.11.14
|
||||
image: hashicorp/terraform:0.12.1
|
||||
stage: test
|
||||
script:
|
||||
- apk add --no-cache bash git grep coreutils
|
||||
|
||||
@ -42,25 +42,25 @@ variable "prefix_slug" {
|
||||
|
||||
variable "ark_schedules_monitor_message" {
|
||||
description = "Custom message for Ark schedules monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "ark_schedules_monitor_timeframe" {
|
||||
description = "Monitor timeframe for Ark schedules monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_1d"
|
||||
}
|
||||
|
||||
variable "ark_schedules_enabled" {
|
||||
description = "Flag to enable Ark schedules monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "ark_schedules_extra_tags" {
|
||||
description = "Extra tags for Ark schedules monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
@ -68,3 +68,4 @@ variable "ark_schedules_monitor_no_data_timeframe" {
|
||||
description = "No data timeframe in minutes"
|
||||
default = 1440
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "ark"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
}
|
||||
|
||||
|
||||
@ -1,29 +1,30 @@
|
||||
resource "datadog_monitor" "ark_schedules_monitor" {
|
||||
count = "${var.ark_schedules_enabled == "true" ? 1 : 0}"
|
||||
count = var.ark_schedules_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Ark backup failed"
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.ark_schedules_monitor_message, var.message)}"
|
||||
message = coalesce(var.ark_schedules_monitor_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
sum(${var.ark_schedules_monitor_timeframe}):min:ark.ark_backup_failure_total${module.filter-tags.query_alert} by {schedule}.as_count() > 1
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
thresholds = {
|
||||
critical = 1
|
||||
warning = 0
|
||||
warning = 0
|
||||
}
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
no_data_timeframe = "${var.ark_schedules_monitor_no_data_timeframe}"
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
no_data_timeframe = var.ark_schedules_monitor_no_data_timeframe
|
||||
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:prometheus", "resource:ark", "team:claranet", "created-by:terraform", "${var.ark_schedules_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:prometheus", "resource:ark", "team:claranet", "created-by:terraform", var.ark_schedules_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
output "ark_schedules_monitor_id" {
|
||||
description = "id for monitor ark_schedules_monitor"
|
||||
value = "${datadog_monitor.ark_schedules_monitor.*.id}"
|
||||
value = datadog_monitor.ark_schedules_monitor.*.id
|
||||
}
|
||||
|
||||
|
||||
4
caas/kubernetes/ark/versions.tf
Normal file
4
caas/kubernetes/ark/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -42,24 +42,25 @@ variable "prefix_slug" {
|
||||
|
||||
variable "apiserver_enabled" {
|
||||
description = "Flag to enable API server monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "apiserver_extra_tags" {
|
||||
description = "Extra tags for API server monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "apiserver_message" {
|
||||
description = "Custom message for API server monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "apiserver_threshold_warning" {
|
||||
description = "API server monitor (warning threshold)"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 3
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "kubernetes"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
}
|
||||
|
||||
|
||||
@ -1,27 +1,28 @@
|
||||
resource "datadog_monitor" "apiserver" {
|
||||
count = "${var.apiserver_enabled == "true" ? 1 : 0}"
|
||||
count = var.apiserver_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes API server does not respond"
|
||||
message = "${coalesce(var.apiserver_message, var.message)}"
|
||||
message = coalesce(var.apiserver_message, var.message)
|
||||
|
||||
type = "service check"
|
||||
|
||||
query = <<EOQ
|
||||
"kube_apiserver_controlplane.up"${module.filter-tags.service_check}.last(6).count_by_status()
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds = {
|
||||
warning = "${var.apiserver_threshold_warning}"
|
||||
warning = var.apiserver_threshold_warning
|
||||
critical = 5
|
||||
}
|
||||
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", "${var.apiserver_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", var.apiserver_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
output "apiserver_id" {
|
||||
description = "id for monitor apiserver"
|
||||
value = "${datadog_monitor.apiserver.*.id}"
|
||||
value = datadog_monitor.apiserver.*.id
|
||||
}
|
||||
|
||||
|
||||
4
caas/kubernetes/cluster/versions.tf
Normal file
4
caas/kubernetes/cluster/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
# Global Terraform
|
||||
variable "environment" {
|
||||
description = "Architecture Environment"
|
||||
type = "string"
|
||||
type = string
|
||||
}
|
||||
|
||||
# Global DataDog
|
||||
@ -43,13 +43,13 @@ variable "filter_tags_custom_excluded" {
|
||||
|
||||
variable "ingress_5xx_enabled" {
|
||||
description = "Flag to enable Ingress 5xx errors monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "ingress_5xx_extra_tags" {
|
||||
description = "Extra tags for Ingress 5xx errors monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
@ -60,37 +60,37 @@ variable "ingress_5xx_message" {
|
||||
|
||||
variable "ingress_5xx_time_aggregator" {
|
||||
description = "Monitor aggregator for Ingress 5xx errors [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "ingress_5xx_timeframe" {
|
||||
description = "Monitor timeframe for Ingress 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "ingress_5xx_threshold_critical" {
|
||||
type = "string"
|
||||
type = string
|
||||
default = "20"
|
||||
description = "5xx critical threshold in percentage"
|
||||
}
|
||||
|
||||
variable "ingress_5xx_threshold_warning" {
|
||||
type = "string"
|
||||
type = string
|
||||
default = "10"
|
||||
description = "5xx warning threshold in percentage"
|
||||
}
|
||||
|
||||
variable "ingress_4xx_enabled" {
|
||||
description = "Flag to enable Ingress 4xx errors monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "ingress_4xx_extra_tags" {
|
||||
description = "Extra tags for Ingress 4xx errors monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
@ -101,24 +101,24 @@ variable "ingress_4xx_message" {
|
||||
|
||||
variable "ingress_4xx_time_aggregator" {
|
||||
description = "Monitor aggregator for Ingress 4xx errors [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "ingress_4xx_timeframe" {
|
||||
description = "Monitor timeframe for Ingress 4xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "ingress_4xx_threshold_critical" {
|
||||
type = "string"
|
||||
type = string
|
||||
default = "40"
|
||||
description = "4xx critical threshold in percentage"
|
||||
}
|
||||
|
||||
variable "ingress_4xx_threshold_warning" {
|
||||
type = "string"
|
||||
type = string
|
||||
default = "20"
|
||||
description = "4xx warning threshold in percentage"
|
||||
}
|
||||
@ -127,3 +127,4 @@ variable "artificial_requests_count" {
|
||||
default = 5
|
||||
description = "Number of false requests used to mitigate false positive in case of low trafic"
|
||||
}
|
||||
|
||||
|
||||
@ -1,22 +1,22 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "ingress"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
extra_tags_excluded = ["upstream:upstream-default-backend"]
|
||||
}
|
||||
|
||||
module "filter-tags-5xx" {
|
||||
source = "../../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "ingress"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
extra_tags = ["status_code:5xx"]
|
||||
extra_tags_excluded = ["upstream:upstream-default-backend"]
|
||||
}
|
||||
@ -24,11 +24,12 @@ module "filter-tags-5xx" {
|
||||
module "filter-tags-4xx" {
|
||||
source = "../../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "ingress"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
extra_tags = ["status_code:4xx"]
|
||||
extra_tags_excluded = ["upstream:upstream-default-backend"]
|
||||
}
|
||||
|
||||
|
||||
@ -1,63 +1,64 @@
|
||||
resource "datadog_monitor" "nginx_ingress_too_many_5xx" {
|
||||
count = "${var.ingress_5xx_enabled == "true" ? 1 : 0}"
|
||||
count = var.ingress_5xx_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Nginx Ingress 5xx errors {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.ingress_5xx_message, var.message)}"
|
||||
message = coalesce(var.ingress_5xx_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
${var.ingress_5xx_time_aggregator}(${var.ingress_5xx_timeframe}): default(
|
||||
sum:nginx_ingress.nginx_upstream_responses_total${module.filter-tags-5xx.query_alert} by {upstream,ingress_class}.as_rate() /
|
||||
(sum:nginx_ingress.nginx_upstream_requests_total${module.filter-tags.query_alert} by {upstream,ingress_class}.as_rate() + ${var.artificial_requests_count})
|
||||
* 100, 0) > ${var.ingress_5xx_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
type = "query alert"
|
||||
|
||||
thresholds {
|
||||
warning = "${var.ingress_5xx_threshold_warning}"
|
||||
critical = "${var.ingress_5xx_threshold_critical}"
|
||||
thresholds = {
|
||||
warning = var.ingress_5xx_threshold_warning
|
||||
critical = var.ingress_5xx_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
notify_no_data = false
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:prometheus", "resource:nginx-ingress-controller", "team:claranet", "created-by:terraform", "${var.ingress_5xx_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:prometheus", "resource:nginx-ingress-controller", "team:claranet", "created-by:terraform", var.ingress_5xx_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "nginx_ingress_too_many_4xx" {
|
||||
count = "${var.ingress_4xx_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Nginx Ingress 4xx errors {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.ingress_4xx_message, var.message)}"
|
||||
count = var.ingress_4xx_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Nginx Ingress 4xx errors {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.ingress_4xx_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
${var.ingress_4xx_time_aggregator}(${var.ingress_4xx_timeframe}): default(
|
||||
sum:nginx_ingress.nginx_upstream_responses_total${module.filter-tags-4xx.query_alert} by {upstream,ingress_class}.as_rate() /
|
||||
(sum:nginx_ingress.nginx_upstream_requests_total${module.filter-tags.query_alert} by {upstream,ingress_class}.as_rate() + ${var.artificial_requests_count})
|
||||
* 100, 0) > ${var.ingress_4xx_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
type = "query alert"
|
||||
type = "query alert"
|
||||
|
||||
thresholds {
|
||||
warning = "${var.ingress_4xx_threshold_warning}"
|
||||
critical = "${var.ingress_4xx_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:prometheus", "resource:nginx-ingress-controller", "team:claranet", "created-by:terraform", "${var.ingress_4xx_extra_tags}"]
|
||||
thresholds = {
|
||||
warning = var.ingress_4xx_threshold_warning
|
||||
critical = var.ingress_4xx_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:prometheus", "resource:nginx-ingress-controller", "team:claranet", "created-by:terraform", var.ingress_4xx_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
output "nginx_ingress_too_many_5xx_id" {
|
||||
description = "id for monitor nginx_ingress_too_many_5xx"
|
||||
value = "${datadog_monitor.nginx_ingress_too_many_5xx.*.id}"
|
||||
value = datadog_monitor.nginx_ingress_too_many_5xx.*.id
|
||||
}
|
||||
|
||||
output "nginx_ingress_too_many_4xx_id" {
|
||||
description = "id for monitor nginx_ingress_too_many_4xx"
|
||||
value = "${datadog_monitor.nginx_ingress_too_many_4xx.*.id}"
|
||||
value = datadog_monitor.nginx_ingress_too_many_4xx.*.id
|
||||
}
|
||||
|
||||
|
||||
4
caas/kubernetes/ingress/vts/versions.tf
Normal file
4
caas/kubernetes/ingress/vts/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -42,175 +42,175 @@ variable "prefix_slug" {
|
||||
|
||||
variable "disk_pressure_enabled" {
|
||||
description = "Flag to enable Disk pressure monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "disk_pressure_extra_tags" {
|
||||
description = "Extra tags for Disk pressure monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "disk_pressure_message" {
|
||||
description = "Custom message for Disk pressure monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "disk_pressure_threshold_warning" {
|
||||
description = "Disk pressure monitor (warning threshold)"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 3
|
||||
}
|
||||
|
||||
variable "disk_out_enabled" {
|
||||
description = "Flag to enable Out of disk monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "disk_out_extra_tags" {
|
||||
description = "Extra tags for Out of disk monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "disk_out_message" {
|
||||
description = "Custom message for Out of disk monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "disk_out_threshold_warning" {
|
||||
description = "Out of disk monitor (warning threshold)"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 3
|
||||
}
|
||||
|
||||
variable "memory_pressure_enabled" {
|
||||
description = "Flag to enable Memory pressure monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "memory_pressure_extra_tags" {
|
||||
description = "Extra tags for Memory pressure monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "memory_pressure_message" {
|
||||
description = "Custom message for Memory pressure monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "memory_pressure_threshold_warning" {
|
||||
description = "Memory pressure monitor (warning threshold)"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 3
|
||||
}
|
||||
|
||||
variable "ready_enabled" {
|
||||
description = "Flag to enable Node ready monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "ready_extra_tags" {
|
||||
description = "Extra tags for Node ready monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "ready_message" {
|
||||
description = "Custom message for Node ready monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "ready_threshold_warning" {
|
||||
description = "Node ready monitor (warning threshold)"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 3
|
||||
}
|
||||
|
||||
variable "kubelet_ping_enabled" {
|
||||
description = "Flag to enable Kubelet ping monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "kubelet_ping_extra_tags" {
|
||||
description = "Extra tags for Kubelet ping monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "kubelet_ping_message" {
|
||||
description = "Custom message for Kubelet ping monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "kubelet_ping_threshold_warning" {
|
||||
description = "Kubelet ping monitor (warning threshold)"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 3
|
||||
}
|
||||
|
||||
variable "kubelet_syncloop_enabled" {
|
||||
description = "Flag to enable Kubelet sync loop monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "kubelet_syncloop_extra_tags" {
|
||||
description = "Extra tags for Kubelet sync loop monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "kubelet_syncloop_message" {
|
||||
description = "Custom message for Kubelet sync loop monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "kubelet_syncloop_threshold_warning" {
|
||||
description = "Kubelet sync loop monitor (warning threshold)"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 3
|
||||
}
|
||||
|
||||
variable "unregister_net_device_enabled" {
|
||||
description = "Flag to enable Unregister net device monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "unregister_net_device_extra_tags" {
|
||||
description = "Extra tags for Unregister net device monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "unregister_net_device_message" {
|
||||
description = "Custom message for Unregister net device monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "unregister_net_device_time_aggregator" {
|
||||
description = "Monitor aggregator for Unregister net device [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "unregister_net_device_timeframe" {
|
||||
description = "Monitor timeframe for Unregister net device [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "15m"
|
||||
}
|
||||
|
||||
@ -221,61 +221,61 @@ variable "unregister_net_device_threshold_critical" {
|
||||
|
||||
variable "node_unschedulable_enabled" {
|
||||
description = "Flag to enable node unschedulable monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "node_unschedulable_extra_tags" {
|
||||
description = "Extra tags for node unschedulable monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "node_unschedulable_message" {
|
||||
description = "Custom message for node unschedulable monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "node_unschedulable_time_aggregator" {
|
||||
description = "Monitor aggregator for node unschedulable [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "node_unschedulable_timeframe" {
|
||||
description = "Monitor timeframe for node unschedulable [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_1h"
|
||||
}
|
||||
|
||||
variable "volume_space_enabled" {
|
||||
description = "Flag to enable Volume space monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "volume_space_extra_tags" {
|
||||
description = "Extra tags for Volume space monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "volume_space_message" {
|
||||
description = "Custom message for Volume space monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "volume_space_time_aggregator" {
|
||||
description = "Monitor aggregator for Volume space [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "volume_space_timeframe" {
|
||||
description = "Monitor timeframe for Volume space [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -291,31 +291,31 @@ variable "volume_space_threshold_warning" {
|
||||
|
||||
variable "volume_inodes_enabled" {
|
||||
description = "Flag to enable Volume inodes monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "volume_inodes_extra_tags" {
|
||||
description = "Extra tags for Volume inodes monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "volume_inodes_message" {
|
||||
description = "Custom message for Volume inodes monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "volume_inodes_time_aggregator" {
|
||||
description = "Monitor aggregator for Volume inodes [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "volume_inodes_timeframe" {
|
||||
description = "Monitor timeframe for Volume inodes [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -328,3 +328,4 @@ variable "volume_inodes_threshold_warning" {
|
||||
default = 90
|
||||
description = "Volume inodes warning threshold"
|
||||
}
|
||||
|
||||
|
||||
@ -1,20 +1,21 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "kubernetes"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
}
|
||||
|
||||
module "filter-tags-unschedulable" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "kubernetes"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
extra_tags = ["status:unschedulable"]
|
||||
}
|
||||
|
||||
|
||||
@ -1,104 +1,104 @@
|
||||
resource "datadog_monitor" "disk_pressure" {
|
||||
count = "${var.disk_pressure_enabled == "true" ? 1 : 0}"
|
||||
count = var.disk_pressure_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node Disk pressure"
|
||||
message = "${coalesce(var.disk_pressure_message, var.message)}"
|
||||
message = coalesce(var.disk_pressure_message, var.message)
|
||||
|
||||
type = "service check"
|
||||
|
||||
query = <<EOQ
|
||||
"kubernetes_state.node.disk_pressure"${module.filter-tags.service_check}.by("kubernetescluster","node").last(6).count_by_status()
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds = {
|
||||
warning = "${var.disk_pressure_threshold_warning}"
|
||||
warning = var.disk_pressure_threshold_warning
|
||||
critical = 5
|
||||
}
|
||||
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", "${var.disk_pressure_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", var.disk_pressure_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "disk_out" {
|
||||
count = "${var.disk_out_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node Out of disk"
|
||||
message = "${coalesce(var.disk_out_message, var.message)}"
|
||||
count = var.disk_out_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node Out of disk"
|
||||
message = coalesce(var.disk_out_message, var.message)
|
||||
|
||||
type = "service check"
|
||||
|
||||
query = <<EOQ
|
||||
"kubernetes_state.node.out_of_disk"${module.filter-tags.service_check}.by("kubernetescluster","node").last(6).count_by_status()
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds = {
|
||||
warning = "${var.disk_out_threshold_warning}"
|
||||
critical = 5
|
||||
}
|
||||
thresholds = {
|
||||
warning = var.disk_out_threshold_warning
|
||||
critical = 5
|
||||
}
|
||||
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", "${var.disk_out_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", var.disk_out_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "memory_pressure" {
|
||||
count = "${var.memory_pressure_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node Memory pressure"
|
||||
message = "${coalesce(var.memory_pressure_message, var.message)}"
|
||||
count = var.memory_pressure_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node Memory pressure"
|
||||
message = coalesce(var.memory_pressure_message, var.message)
|
||||
|
||||
type = "service check"
|
||||
type = "service check"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
"kubernetes_state.node.memory_pressure"${module.filter-tags.service_check}.by("kubernetescluster","node").last(6).count_by_status()
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds = {
|
||||
warning = "${var.memory_pressure_threshold_warning}"
|
||||
critical = 5
|
||||
}
|
||||
thresholds = {
|
||||
warning = var.memory_pressure_threshold_warning
|
||||
critical = 5
|
||||
}
|
||||
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", "${var.memory_pressure_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", var.memory_pressure_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "ready" {
|
||||
count = "${var.ready_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node not ready"
|
||||
message = "${coalesce(var.ready_message, var.message)}"
|
||||
count = var.ready_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node not ready"
|
||||
message = coalesce(var.ready_message, var.message)
|
||||
|
||||
type = "service check"
|
||||
type = "service check"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
"kubernetes_state.node.ready"${module.filter-tags.service_check}.by("kubernetescluster","node").last(6).count_by_status()
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds = {
|
||||
warning = "${var.ready_threshold_warning}"
|
||||
warning = var.ready_threshold_warning
|
||||
critical = 5
|
||||
}
|
||||
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
@ -107,105 +107,105 @@ resource "datadog_monitor" "ready" {
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", "${var.ready_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", var.ready_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "kubelet_ping" {
|
||||
count = "${var.kubelet_ping_enabled == "true" ? 1 : 0}"
|
||||
count = var.kubelet_ping_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node Kubelet API does not respond"
|
||||
message = "${coalesce(var.kubelet_ping_message, var.message)}"
|
||||
message = coalesce(var.kubelet_ping_message, var.message)
|
||||
|
||||
type = "service check"
|
||||
|
||||
query = <<EOQ
|
||||
"kubernetes.kubelet.check.ping"${module.filter-tags.service_check}.by("kubernetescluster","name").last(6).count_by_status()
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds = {
|
||||
warning = "${var.kubelet_ping_threshold_warning}"
|
||||
warning = var.kubelet_ping_threshold_warning
|
||||
critical = 5
|
||||
}
|
||||
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", "${var.kubelet_ping_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", var.kubelet_ping_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "kubelet_syncloop" {
|
||||
count = "${var.kubelet_syncloop_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node Kubelet sync loop that updates containers does not work"
|
||||
message = "${coalesce(var.kubelet_syncloop_message, var.message)}"
|
||||
count = var.kubelet_syncloop_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node Kubelet sync loop that updates containers does not work"
|
||||
message = coalesce(var.kubelet_syncloop_message, var.message)
|
||||
|
||||
type = "service check"
|
||||
|
||||
query = <<EOQ
|
||||
"kubernetes.kubelet.check.syncloop"${module.filter-tags.service_check}.by("kubernetescluster","name").last(6).count_by_status()
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds = {
|
||||
warning = "${var.kubelet_syncloop_threshold_warning}"
|
||||
critical = 5
|
||||
}
|
||||
thresholds = {
|
||||
warning = var.kubelet_syncloop_threshold_warning
|
||||
critical = 5
|
||||
}
|
||||
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", "${var.kubelet_syncloop_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", var.kubelet_syncloop_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "unregister_net_device" {
|
||||
count = "${var.unregister_net_device_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node Frequent unregister net device"
|
||||
type = "event alert"
|
||||
message = "${coalesce(var.unregister_net_device_message, var.message)}"
|
||||
count = var.unregister_net_device_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node Frequent unregister net device"
|
||||
type = "event alert"
|
||||
message = coalesce(var.unregister_net_device_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
events('sources:kubernetes priority:all ${module.filter-tags.event_alert} \"UnregisterNetDevice\"').rollup('count').last('${var.unregister_net_device_timeframe}') > ${var.unregister_net_device_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", "${var.unregister_net_device_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", var.unregister_net_device_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "node_unschedulable" {
|
||||
count = "${var.node_unschedulable_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node unschedulable"
|
||||
type = "metric alert"
|
||||
message = "${coalesce(var.node_unschedulable_message, var.message)}"
|
||||
count = var.node_unschedulable_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node unschedulable"
|
||||
type = "metric alert"
|
||||
message = coalesce(var.node_unschedulable_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.node_unschedulable_time_aggregator}(${var.node_unschedulable_timeframe}):
|
||||
sum:kubernetes_state.node.status${module.filter-tags-unschedulable.query_alert} by {kubernetescluster,node}
|
||||
> 0
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
thresholds = {
|
||||
critical = 0
|
||||
}
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
@ -215,69 +215,70 @@ resource "datadog_monitor" "node_unschedulable" {
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", "${var.node_unschedulable_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", var.node_unschedulable_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "volume_space" {
|
||||
count = "${var.volume_space_enabled == "true" ? 1 : 0}"
|
||||
count = var.volume_space_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node volume space usage {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.volume_space_message, var.message)}"
|
||||
message = coalesce(var.volume_space_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
${var.volume_space_time_aggregator}(${var.volume_space_timeframe}):
|
||||
avg:kubernetes.kubelet.volume.stats.used_bytes${module.filter-tags.query_alert} by {kubernetescluster,name,persistentvolumeclaim} /
|
||||
avg:kubernetes.kubelet.volume.stats.capacity_bytes${module.filter-tags.query_alert} by {kubernetescluster,name,persistentvolumeclaim}
|
||||
* 100 > ${var.volume_space_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
critical = "${var.volume_space_threshold_critical}"
|
||||
warning = "${var.volume_space_threshold_warning}"
|
||||
thresholds = {
|
||||
critical = var.volume_space_threshold_critical
|
||||
warning = var.volume_space_threshold_warning
|
||||
}
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", "${var.volume_space_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", var.volume_space_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "volume_inodes" {
|
||||
count = "${var.volume_inodes_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node volume inodes usage {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
count = var.volume_inodes_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Node volume inodes usage {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.volume_inodes_message, var.message)}"
|
||||
message = coalesce(var.volume_inodes_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
${var.volume_inodes_time_aggregator}(${var.volume_inodes_timeframe}):
|
||||
avg:kubernetes.kubelet.volume.stats.inodes_used${module.filter-tags.query_alert} by {kubernetescluster,name,persistentvolumeclaim} /
|
||||
avg:kubernetes.kubelet.volume.stats.inodes${module.filter-tags.query_alert} by {kubernetescluster,name,persistentvolumeclaim}
|
||||
* 100 > ${var.volume_inodes_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
critical = "${var.volume_inodes_threshold_critical}"
|
||||
warning = "${var.volume_inodes_threshold_warning}"
|
||||
}
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", "${var.volume_inodes_extra_tags}"]
|
||||
thresholds = {
|
||||
critical = var.volume_inodes_threshold_critical
|
||||
warning = var.volume_inodes_threshold_warning
|
||||
}
|
||||
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-node", "team:claranet", "created-by:terraform", var.volume_inodes_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,49 +1,50 @@
|
||||
output "disk_pressure_id" {
|
||||
description = "id for monitor disk_pressure"
|
||||
value = "${datadog_monitor.disk_pressure.*.id}"
|
||||
value = datadog_monitor.disk_pressure.*.id
|
||||
}
|
||||
|
||||
output "disk_out_id" {
|
||||
description = "id for monitor disk_out"
|
||||
value = "${datadog_monitor.disk_out.*.id}"
|
||||
value = datadog_monitor.disk_out.*.id
|
||||
}
|
||||
|
||||
output "memory_pressure_id" {
|
||||
description = "id for monitor memory_pressure"
|
||||
value = "${datadog_monitor.memory_pressure.*.id}"
|
||||
value = datadog_monitor.memory_pressure.*.id
|
||||
}
|
||||
|
||||
output "ready_id" {
|
||||
description = "id for monitor ready"
|
||||
value = "${datadog_monitor.ready.*.id}"
|
||||
value = datadog_monitor.ready.*.id
|
||||
}
|
||||
|
||||
output "kubelet_ping_id" {
|
||||
description = "id for monitor kubelet_ping"
|
||||
value = "${datadog_monitor.kubelet_ping.*.id}"
|
||||
value = datadog_monitor.kubelet_ping.*.id
|
||||
}
|
||||
|
||||
output "kubelet_syncloop_id" {
|
||||
description = "id for monitor kubelet_syncloop"
|
||||
value = "${datadog_monitor.kubelet_syncloop.*.id}"
|
||||
value = datadog_monitor.kubelet_syncloop.*.id
|
||||
}
|
||||
|
||||
output "unregister_net_device_id" {
|
||||
description = "id for monitor unregister_net_device"
|
||||
value = "${datadog_monitor.unregister_net_device.*.id}"
|
||||
value = datadog_monitor.unregister_net_device.*.id
|
||||
}
|
||||
|
||||
output "node_unschedulable_id" {
|
||||
description = "id for monitor node_unschedulable"
|
||||
value = "${datadog_monitor.node_unschedulable.*.id}"
|
||||
value = datadog_monitor.node_unschedulable.*.id
|
||||
}
|
||||
|
||||
output "volume_space_id" {
|
||||
description = "id for monitor volume_space"
|
||||
value = "${datadog_monitor.volume_space.*.id}"
|
||||
value = datadog_monitor.volume_space.*.id
|
||||
}
|
||||
|
||||
output "volume_inodes_id" {
|
||||
description = "id for monitor volume_inodes"
|
||||
value = "${datadog_monitor.volume_inodes.*.id}"
|
||||
value = datadog_monitor.volume_inodes.*.id
|
||||
}
|
||||
|
||||
|
||||
4
caas/kubernetes/node/versions.tf
Normal file
4
caas/kubernetes/node/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -42,61 +42,61 @@ variable "prefix_slug" {
|
||||
|
||||
variable "pod_phase_status_enabled" {
|
||||
description = "Flag to enable Pod phase status monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "pod_phase_status_extra_tags" {
|
||||
description = "Extra tags for Pod phase status monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "pod_phase_status_message" {
|
||||
description = "Custom message for Pod phase status monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "pod_phase_status_time_aggregator" {
|
||||
description = "Monitor aggregator for Pod phase status [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "pod_phase_status_timeframe" {
|
||||
description = "Monitor timeframe for Pod phase status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "error_enabled" {
|
||||
description = "Flag to enable Pod errors monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "error_extra_tags" {
|
||||
description = "Extra tags for Pod errors monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "error_message" {
|
||||
description = "Custom message for Pod errors monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "error_time_aggregator" {
|
||||
description = "Monitor aggregator for Pod errors [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "sum"
|
||||
}
|
||||
|
||||
variable "error_timeframe" {
|
||||
description = "Monitor timeframe for Pod errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
@ -109,3 +109,4 @@ variable "error_threshold_warning" {
|
||||
default = 0
|
||||
description = "error warning threshold"
|
||||
}
|
||||
|
||||
|
||||
@ -1,31 +1,32 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "kubernetes"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
}
|
||||
|
||||
module "filter-tags-phase" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "kubernetes"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
extra_tags_excluded = ["phase:pending,phase:running,phase:succeeded,phase:unknown"]
|
||||
}
|
||||
|
||||
module "filter-tags-nocontainercreating" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "kubernetes"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
extra_tags_excluded = ["reason:containercreating"]
|
||||
}
|
||||
|
||||
|
||||
@ -1,59 +1,60 @@
|
||||
resource "datadog_monitor" "pod_phase_status" {
|
||||
count = "${var.pod_phase_status_enabled == "true" ? 1 : 0}"
|
||||
count = var.pod_phase_status_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Pod phase status failed"
|
||||
type = "metric alert"
|
||||
message = "${coalesce(var.pod_phase_status_message, var.message)}"
|
||||
message = coalesce(var.pod_phase_status_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
${var.pod_phase_status_time_aggregator}(${var.pod_phase_status_timeframe}):
|
||||
sum:kubernetes_state.pod.status_phase${module.filter-tags-phase.query_alert} by {namespace} > 0
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
thresholds = {
|
||||
critical = 0
|
||||
}
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-pod", "team:claranet", "created-by:terraform", "${var.pod_phase_status_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-pod", "team:claranet", "created-by:terraform", var.pod_phase_status_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "error" {
|
||||
count = "${var.error_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Pod waiting errors"
|
||||
count = var.error_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Pod waiting errors"
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.error_message, var.message)}"
|
||||
message = coalesce(var.error_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
${var.error_time_aggregator}(${var.error_timeframe}):
|
||||
sum:kubernetes_state.container.status_report.count.waiting${module.filter-tags-nocontainercreating.query_alert} by {namespace,pod,reason}.as_count()
|
||||
> ${var.error_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
critical = "${var.error_threshold_critical}"
|
||||
warning = "${var.error_threshold_warning}"
|
||||
}
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-pod", "team:claranet", "created-by:terraform", "${var.error_extra_tags}"]
|
||||
thresholds = {
|
||||
critical = var.error_threshold_critical
|
||||
warning = var.error_threshold_warning
|
||||
}
|
||||
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-pod", "team:claranet", "created-by:terraform", var.error_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
output "pod_phase_status_id" {
|
||||
description = "id for monitor pod_phase_status"
|
||||
value = "${datadog_monitor.pod_phase_status.*.id}"
|
||||
value = datadog_monitor.pod_phase_status.*.id
|
||||
}
|
||||
|
||||
output "error_id" {
|
||||
description = "id for monitor error"
|
||||
value = "${datadog_monitor.error.*.id}"
|
||||
value = datadog_monitor.error.*.id
|
||||
}
|
||||
|
||||
|
||||
4
caas/kubernetes/pod/versions.tf
Normal file
4
caas/kubernetes/pod/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -42,79 +42,79 @@ variable "prefix_slug" {
|
||||
|
||||
variable "job_enabled" {
|
||||
description = "Flag to enable Job monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "job_extra_tags" {
|
||||
description = "Extra tags for Job monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "job_message" {
|
||||
description = "Custom message for Job monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "job_threshold_warning" {
|
||||
description = "Job monitor (warning threshold)"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 3
|
||||
}
|
||||
|
||||
variable "cronjob_enabled" {
|
||||
description = "Flag to enable Cronjob monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "cronjob_extra_tags" {
|
||||
description = "Extra tags for Cronjob monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cronjob_message" {
|
||||
description = "Custom message for Cronjob monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cronjob_threshold_warning" {
|
||||
description = "Cronjob monitor (warning threshold)"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 3
|
||||
}
|
||||
|
||||
variable "replica_available_enabled" {
|
||||
description = "Flag to enable Available replica monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "replica_available_extra_tags" {
|
||||
description = "Extra tags for Available replicamonitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "replica_available_message" {
|
||||
description = "Custom message for Available replica monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "replica_available_time_aggregator" {
|
||||
description = "Monitor aggregator for Available replica [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "replica_available_timeframe" {
|
||||
description = "Monitor timeframe for Available replica [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
@ -125,31 +125,31 @@ variable "replica_available_threshold_critical" {
|
||||
|
||||
variable "replica_ready_enabled" {
|
||||
description = "Flag to enable Ready replica monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "replica_ready_extra_tags" {
|
||||
description = "Extra tags for Ready replica monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "replica_ready_message" {
|
||||
description = "Custom message for Ready replica monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "replica_ready_time_aggregator" {
|
||||
description = "Monitor aggregator for Ready replica [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "replica_ready_timeframe" {
|
||||
description = "Monitor timeframe for Ready replica [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -160,31 +160,31 @@ variable "replica_ready_threshold_critical" {
|
||||
|
||||
variable "replica_current_enabled" {
|
||||
description = "Flag to enable Current replica monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "replica_current_extra_tags" {
|
||||
description = "Extra tags for Current replica monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "replica_current_message" {
|
||||
description = "Custom message for Current replica monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "replica_current_time_aggregator" {
|
||||
description = "Monitor aggregator for Current replica [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "replica_current_timeframe" {
|
||||
description = "Monitor timeframe for Current replica [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
@ -192,3 +192,4 @@ variable "replica_current_threshold_critical" {
|
||||
default = 1
|
||||
description = "Current replica critical threshold"
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "kubernetes"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
}
|
||||
|
||||
|
||||
@ -1,109 +1,109 @@
|
||||
resource "datadog_monitor" "job" {
|
||||
count = "${var.job_enabled == "true" ? 1 : 0}"
|
||||
count = var.job_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes job failed"
|
||||
message = "${coalesce(var.job_message, var.message)}"
|
||||
message = coalesce(var.job_message, var.message)
|
||||
|
||||
type = "service check"
|
||||
|
||||
query = <<EOQ
|
||||
"kubernetes_state.job.complete"${module.filter-tags.service_check}.by("job_name").last(6).count_by_status()
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds = {
|
||||
warning = "${var.job_threshold_warning}"
|
||||
warning = var.job_threshold_warning
|
||||
critical = 5
|
||||
}
|
||||
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-workload", "team:claranet", "created-by:terraform", "${var.job_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-workload", "team:claranet", "created-by:terraform", var.job_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "cronjob" {
|
||||
count = "${var.cronjob_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes cronjob scheduling failed"
|
||||
message = "${coalesce(var.cronjob_message, var.message)}"
|
||||
count = var.cronjob_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes cronjob scheduling failed"
|
||||
message = coalesce(var.cronjob_message, var.message)
|
||||
|
||||
type = "service check"
|
||||
|
||||
query = <<EOQ
|
||||
"kubernetes_state.cronjob.on_schedule_check"${module.filter-tags.service_check}.by("cronjob").last(6).count_by_status()
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds = {
|
||||
warning = "${var.cronjob_threshold_warning}"
|
||||
critical = 5
|
||||
}
|
||||
thresholds = {
|
||||
warning = var.cronjob_threshold_warning
|
||||
critical = 5
|
||||
}
|
||||
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-workload", "team:claranet", "created-by:terraform", "${var.cronjob_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-workload", "team:claranet", "created-by:terraform", var.cronjob_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "replica_available" {
|
||||
count = "${var.replica_available_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Available replicas {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
count = var.replica_available_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Available replicas {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.replica_available_message, var.message)}"
|
||||
message = coalesce(var.replica_available_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.replica_available_time_aggregator}(${var.replica_available_timeframe}):
|
||||
max:kubernetes_state.deployment.replicas_desired${module.filter-tags.query_alert} by {namespace, deployment} -
|
||||
max:kubernetes_state.deployment.replicas_available${module.filter-tags.query_alert} by {namespace, deployment}
|
||||
+ 1 < ${var.replica_available_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
critical = "${var.replica_available_threshold_critical}"
|
||||
}
|
||||
thresholds = {
|
||||
critical = var.replica_available_threshold_critical
|
||||
}
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-workload", "team:claranet", "created-by:terraform", "${var.replica_available_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-workload", "team:claranet", "created-by:terraform", var.replica_available_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "replica_ready" {
|
||||
count = "${var.replica_ready_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Ready replicas {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
count = var.replica_ready_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Ready replicas {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.replica_ready_message, var.message)}"
|
||||
message = coalesce(var.replica_ready_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.replica_available_time_aggregator}(${var.replica_available_timeframe}):
|
||||
max:kubernetes_state.replicaset.replicas_desired${module.filter-tags.query_alert} by {namespace, replicaset} -
|
||||
max:kubernetes_state.replicaset.replicas_ready${module.filter-tags.query_alert} by {namespace, replicaset}
|
||||
+ 1 < ${var.replica_available_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
critical = "${var.replica_ready_threshold_critical}"
|
||||
thresholds = {
|
||||
critical = var.replica_ready_threshold_critical
|
||||
}
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
@ -113,36 +113,37 @@ resource "datadog_monitor" "replica_ready" {
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-workload", "team:claranet", "created-by:terraform", "${var.replica_ready_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-workload", "team:claranet", "created-by:terraform", var.replica_ready_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "replica_current" {
|
||||
count = "${var.replica_current_enabled == "true" ? 1 : 0}"
|
||||
count = var.replica_current_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kubernetes Current replicas {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.replica_current_message, var.message)}"
|
||||
message = coalesce(var.replica_current_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
${var.replica_available_time_aggregator}(${var.replica_available_timeframe}):
|
||||
max:kubernetes_state.replicaset.replicas_desired${module.filter-tags.query_alert} by {namespace, replicaset} -
|
||||
max:kubernetes_state.replicaset.replicas${module.filter-tags.query_alert} by {namespace, replicaset}
|
||||
+ 1 < ${var.replica_available_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
critical = "${var.replica_current_threshold_critical}"
|
||||
thresholds = {
|
||||
critical = var.replica_current_threshold_critical
|
||||
}
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-workload", "team:claranet", "created-by:terraform", "${var.replica_current_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:caas", "provider:kubernetes", "resource:kubernetes-workload", "team:claranet", "created-by:terraform", var.replica_current_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,24 +1,25 @@
|
||||
output "job_id" {
|
||||
description = "id for monitor job"
|
||||
value = "${datadog_monitor.job.*.id}"
|
||||
value = datadog_monitor.job.*.id
|
||||
}
|
||||
|
||||
output "cronjob_id" {
|
||||
description = "id for monitor cronjob"
|
||||
value = "${datadog_monitor.cronjob.*.id}"
|
||||
value = datadog_monitor.cronjob.*.id
|
||||
}
|
||||
|
||||
output "replica_available_id" {
|
||||
description = "id for monitor replica_available"
|
||||
value = "${datadog_monitor.replica_available.*.id}"
|
||||
value = datadog_monitor.replica_available.*.id
|
||||
}
|
||||
|
||||
output "replica_ready_id" {
|
||||
description = "id for monitor replica_ready"
|
||||
value = "${datadog_monitor.replica_ready.*.id}"
|
||||
value = datadog_monitor.replica_ready.*.id
|
||||
}
|
||||
|
||||
output "replica_current_id" {
|
||||
description = "id for monitor replica_current"
|
||||
value = "${datadog_monitor.replica_current.*.id}"
|
||||
value = datadog_monitor.replica_current.*.id
|
||||
}
|
||||
|
||||
|
||||
4
caas/kubernetes/workload/versions.tf
Normal file
4
caas/kubernetes/workload/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
variable "environment" {
|
||||
description = "Architecture environment"
|
||||
type = "string"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "filter_tags_use_defaults" {
|
||||
@ -43,61 +43,61 @@ variable "prefix_slug" {
|
||||
|
||||
variable "alb_no_healthy_instances_enabled" {
|
||||
description = "Flag to enable ALB no healthy instances monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "alb_no_healthy_instances_extra_tags" {
|
||||
description = "Extra tags for ALB no healthy instances monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "alb_no_healthy_instances_message" {
|
||||
description = "Custom message for ALB no healthy instances monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "alb_no_healthy_instances_time_aggregator" {
|
||||
description = "Monitor aggregator for ALB no healthy instances [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "alb_no_healthy_instances_timeframe" {
|
||||
description = "Monitor timeframe for ALB no healthy instances [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "latency_enabled" {
|
||||
description = "Flag to enable ALB latency monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "latency_extra_tags" {
|
||||
description = "Extra tags for ALB latency monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "latency_message" {
|
||||
description = "Custom message for ALB latency monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "latency_time_aggregator" {
|
||||
description = "Monitor aggregator for ALB latency [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "latency_timeframe" {
|
||||
description = "Monitor timeframe for ALB latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -113,31 +113,31 @@ variable "latency_threshold_warning" {
|
||||
|
||||
variable "httpcode_alb_4xx_enabled" {
|
||||
description = "Flag to enable ALB httpcode 4xx monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "httpcode_alb_4xx_extra_tags" {
|
||||
description = "Extra tags for ALB httpcode 4xx monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "httpcode_alb_4xx_message" {
|
||||
description = "Custom message for ALB httpcode 4xx monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "httpcode_alb_4xx_time_aggregator" {
|
||||
description = "Monitor aggregator for ALB httpcode 4xx [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "httpcode_alb_4xx_timeframe" {
|
||||
description = "Monitor timeframe for ALB httpcode 4xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -153,31 +153,31 @@ variable "httpcode_alb_4xx_threshold_warning" {
|
||||
|
||||
variable "httpcode_target_4xx_enabled" {
|
||||
description = "Flag to enable ALB target httpcode 4xx monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "httpcode_target_4xx_extra_tags" {
|
||||
description = "Extra tags for ALB target httpcode 4xx monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "httpcode_target_4xx_message" {
|
||||
description = "Custom message for ALB target httpcode 4xx monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "httpcode_target_4xx_time_aggregator" {
|
||||
description = "Monitor aggregator for ALB target httpcode 4xx [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "httpcode_target_4xx_timeframe" {
|
||||
description = "Monitor timeframe for ALB target httpcode 4xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -193,31 +193,31 @@ variable "httpcode_target_4xx_threshold_warning" {
|
||||
|
||||
variable "httpcode_alb_5xx_enabled" {
|
||||
description = "Flag to enable ALB httpcode 5xx monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "httpcode_alb_5xx_extra_tags" {
|
||||
description = "Extra tags for ALB httpcode 5xx monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "httpcode_alb_5xx_message" {
|
||||
description = "Custom message for ALB httpcode 5xx monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "httpcode_alb_5xx_time_aggregator" {
|
||||
description = "Monitor aggregator for ALB httpcode 5xx [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "httpcode_alb_5xx_timeframe" {
|
||||
description = "Monitor timeframe for ALB httpcode 5xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -233,31 +233,31 @@ variable "httpcode_alb_5xx_threshold_warning" {
|
||||
|
||||
variable "httpcode_target_5xx_enabled" {
|
||||
description = "Flag to enable ALB target httpcode 5xx monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "httpcode_target_5xx_extra_tags" {
|
||||
description = "Extra tags for ALB target httpcode 5xx monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "httpcode_target_5xx_message" {
|
||||
description = "Custom message for ALB target httpcode 5xx monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "httpcode_target_5xx_time_aggregator" {
|
||||
description = "Monitor aggregator for ALB target httpcode 5xx [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "httpcode_target_5xx_timeframe" {
|
||||
description = "Monitor timeframe for ALB target httpcode 5xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -275,3 +275,4 @@ variable "artificial_requests_count" {
|
||||
default = 5
|
||||
description = "Number of false requests used to mitigate false positive in case of low trafic"
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "aws_alb"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
}
|
||||
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
resource "datadog_monitor" "ALB_no_healthy_instances" {
|
||||
count = "${var.alb_no_healthy_instances_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ALB healthy instances {{#is_alert}}is at 0{{/is_alert}}{{#is_warning}}is at {{value}}%{{/is_warning}}"
|
||||
count = var.alb_no_healthy_instances_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ALB healthy instances {{#is_alert}}is at 0{{/is_alert}}{{#is_warning}}is at {{value}}%%{{/is_warning}}"
|
||||
message = coalesce(var.alb_no_healthy_instances_message, var.message)
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.alb_no_healthy_instances_message, var.message)}"
|
||||
|
||||
query = <<EOQ
|
||||
${var.alb_no_healthy_instances_time_aggregator}(${var.alb_no_healthy_instances_timeframe}): (
|
||||
@ -10,170 +10,164 @@ resource "datadog_monitor" "ALB_no_healthy_instances" {
|
||||
sum:aws.applicationelb.healthy_host_count.maximum${module.filter-tags.query_alert} by {region,loadbalancer} +
|
||||
sum:aws.applicationelb.un_healthy_host_count.maximum${module.filter-tags.query_alert} by {region,loadbalancer} )
|
||||
) * 100 < 1
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
thresholds {
|
||||
thresholds = {
|
||||
critical = 1
|
||||
warning = 100
|
||||
warning = 100
|
||||
}
|
||||
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:alb", "team:claranet", "created-by:terraform", "${var.alb_no_healthy_instances_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:alb", "team:claranet", "created-by:terraform", var.alb_no_healthy_instances_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "ALB_latency" {
|
||||
count = "${var.latency_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ALB latency {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}"
|
||||
count = var.latency_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ALB latency {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}"
|
||||
message = coalesce(var.latency_message, var.message)
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.latency_message, var.message)}"
|
||||
|
||||
query = <<EOQ
|
||||
${var.latency_time_aggregator}(${var.latency_timeframe}):
|
||||
default(avg:aws.applicationelb.target_response_time.average${module.filter-tags.query_alert} by {region,loadbalancer}, 0)
|
||||
> ${var.latency_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
thresholds = {
|
||||
critical = var.latency_threshold_critical
|
||||
warning = var.latency_threshold_warning
|
||||
}
|
||||
|
||||
thresholds {
|
||||
critical = "${var.latency_threshold_critical}"
|
||||
warning = "${var.latency_threshold_warning}"
|
||||
}
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:alb", "team:claranet", "created-by:terraform", "${var.latency_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:alb", "team:claranet", "created-by:terraform", var.latency_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "ALB_httpcode_5xx" {
|
||||
count = "${var.httpcode_alb_5xx_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ALB HTTP code 5xx {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
count = var.httpcode_alb_5xx_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ALB HTTP code 5xx {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.httpcode_alb_5xx_message, var.message)
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.httpcode_alb_5xx_message, var.message)}"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.httpcode_alb_5xx_time_aggregator}(${var.httpcode_alb_5xx_timeframe}):
|
||||
default(avg:aws.applicationelb.httpcode_elb_5xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_rate(), 0) / (
|
||||
default(avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_rate() + ${var.artificial_requests_count}, 1))
|
||||
* 100 > ${var.httpcode_alb_5xx_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
thresholds = {
|
||||
critical = var.httpcode_alb_5xx_threshold_critical
|
||||
warning = var.httpcode_alb_5xx_threshold_warning
|
||||
}
|
||||
|
||||
thresholds {
|
||||
critical = "${var.httpcode_alb_5xx_threshold_critical}"
|
||||
warning = "${var.httpcode_alb_5xx_threshold_warning}"
|
||||
}
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:alb", "team:claranet", "created-by:terraform", "${var.httpcode_alb_5xx_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:alb", "team:claranet", "created-by:terraform", var.httpcode_alb_5xx_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "ALB_httpcode_4xx" {
|
||||
count = "${var.httpcode_alb_4xx_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ALB HTTP code 4xx {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
count = var.httpcode_alb_4xx_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ALB HTTP code 4xx {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.httpcode_alb_4xx_message, var.message)
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.httpcode_alb_4xx_message, var.message)}"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.httpcode_alb_4xx_time_aggregator}(${var.httpcode_alb_4xx_timeframe}):
|
||||
default(avg:aws.applicationelb.httpcode_elb_4xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_rate(), 0) / (
|
||||
default(avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_rate() + ${var.artificial_requests_count}, 1))
|
||||
* 100 > ${var.httpcode_alb_4xx_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
thresholds {
|
||||
critical = "${var.httpcode_alb_4xx_threshold_critical}"
|
||||
warning = "${var.httpcode_alb_4xx_threshold_warning}"
|
||||
thresholds = {
|
||||
critical = var.httpcode_alb_4xx_threshold_critical
|
||||
warning = var.httpcode_alb_4xx_threshold_warning
|
||||
}
|
||||
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:alb", "team:claranet", "created-by:terraform", "${var.httpcode_alb_4xx_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:alb", "team:claranet", "created-by:terraform", var.httpcode_alb_4xx_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "ALB_httpcode_target_5xx" {
|
||||
count = "${var.httpcode_target_5xx_enabled == "true" ? 1 : 0}"
|
||||
count = var.httpcode_target_5xx_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ALB target HTTP code 5xx {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.httpcode_target_5xx_message, var.message)
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.httpcode_target_5xx_message, var.message)}"
|
||||
|
||||
query = <<EOQ
|
||||
${var.httpcode_target_5xx_time_aggregator}(${var.httpcode_target_5xx_timeframe}):
|
||||
default(avg:aws.applicationelb.httpcode_target_5xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_rate(), 0) / (
|
||||
default(avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_rate() + ${var.artificial_requests_count}, 1))
|
||||
* 100 > ${var.httpcode_target_5xx_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
thresholds {
|
||||
critical = "${var.httpcode_target_5xx_threshold_critical}"
|
||||
warning = "${var.httpcode_target_5xx_threshold_warning}"
|
||||
thresholds = {
|
||||
critical = var.httpcode_target_5xx_threshold_critical
|
||||
warning = var.httpcode_target_5xx_threshold_warning
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:alb", "team:claranet", "created-by:terraform", "${var.httpcode_target_5xx_extra_tags}"]
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:alb", "team:claranet", "created-by:terraform", var.httpcode_target_5xx_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "ALB_httpcode_target_4xx" {
|
||||
count = "${var.httpcode_target_4xx_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ALB target HTTP code 4xx {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
count = var.httpcode_target_4xx_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ALB target HTTP code 4xx {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.httpcode_target_4xx_message, var.message)
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.httpcode_target_4xx_message, var.message)}"
|
||||
|
||||
query = <<EOQ
|
||||
${var.httpcode_target_4xx_time_aggregator}(${var.httpcode_target_4xx_timeframe}):
|
||||
default(avg:aws.applicationelb.httpcode_target_4xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_rate(), 0) / (
|
||||
default(avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_rate() + ${var.artificial_requests_count}, 1))
|
||||
* 100 > ${var.httpcode_target_4xx_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
thresholds {
|
||||
critical = "${var.httpcode_target_4xx_threshold_critical}"
|
||||
warning = "${var.httpcode_target_4xx_threshold_warning}"
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:alb", "team:claranet", "created-by:terraform", "${var.httpcode_target_4xx_extra_tags}"]
|
||||
thresholds = {
|
||||
critical = var.httpcode_target_4xx_threshold_critical
|
||||
warning = var.httpcode_target_4xx_threshold_warning
|
||||
}
|
||||
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:alb", "team:claranet", "created-by:terraform", var.httpcode_target_4xx_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,29 +1,30 @@
|
||||
output "ALB_no_healthy_instances_id" {
|
||||
description = "id for monitor ALB_no_healthy_instances"
|
||||
value = "${datadog_monitor.ALB_no_healthy_instances.*.id}"
|
||||
value = datadog_monitor.ALB_no_healthy_instances.*.id
|
||||
}
|
||||
|
||||
output "ALB_latency_id" {
|
||||
description = "id for monitor ALB_latency"
|
||||
value = "${datadog_monitor.ALB_latency.*.id}"
|
||||
value = datadog_monitor.ALB_latency.*.id
|
||||
}
|
||||
|
||||
output "ALB_httpcode_5xx_id" {
|
||||
description = "id for monitor ALB_httpcode_5xx"
|
||||
value = "${datadog_monitor.ALB_httpcode_5xx.*.id}"
|
||||
value = datadog_monitor.ALB_httpcode_5xx.*.id
|
||||
}
|
||||
|
||||
output "ALB_httpcode_4xx_id" {
|
||||
description = "id for monitor ALB_httpcode_4xx"
|
||||
value = "${datadog_monitor.ALB_httpcode_4xx.*.id}"
|
||||
value = datadog_monitor.ALB_httpcode_4xx.*.id
|
||||
}
|
||||
|
||||
output "ALB_httpcode_target_5xx_id" {
|
||||
description = "id for monitor ALB_httpcode_target_5xx"
|
||||
value = "${datadog_monitor.ALB_httpcode_target_5xx.*.id}"
|
||||
value = datadog_monitor.ALB_httpcode_target_5xx.*.id
|
||||
}
|
||||
|
||||
output "ALB_httpcode_target_4xx_id" {
|
||||
description = "id for monitor ALB_httpcode_target_4xx"
|
||||
value = "${datadog_monitor.ALB_httpcode_target_4xx.*.id}"
|
||||
value = datadog_monitor.ALB_httpcode_target_4xx.*.id
|
||||
}
|
||||
|
||||
|
||||
4
cloud/aws/alb/versions.tf
Normal file
4
cloud/aws/alb/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -1,6 +1,6 @@
|
||||
variable "environment" {
|
||||
description = "Environment"
|
||||
type = "string"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "filter_tags" {
|
||||
@ -33,31 +33,31 @@ variable "prefix_slug" {
|
||||
|
||||
variable "latency_enabled" {
|
||||
description = "Flag to enable API Gateway latency monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "latency_extra_tags" {
|
||||
description = "Extra tags for API Gateway latency monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "latency_message" {
|
||||
description = "Custom message for API Gateway latency monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "latency_time_aggregator" {
|
||||
description = "Monitor aggregator for API Gateway latency [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "latency_timeframe" {
|
||||
description = "Monitor timeframe for API latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -77,31 +77,31 @@ variable "latency_threshold_warning" {
|
||||
|
||||
variable "http_5xx_requests_enabled" {
|
||||
description = "Flag to enable API Gateway HTTP 5xx requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "http_5xx_requests_extra_tags" {
|
||||
description = "Extra tags for API Gateway HTTP 5xx requests monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "http_5xx_requests_message" {
|
||||
description = "Custom message for API Gateway HTTP 5xx requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "http_5xx_requests_time_aggregator" {
|
||||
description = "Monitor aggregator for API HTTP 5xx requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "http_5xx_requests_timeframe" {
|
||||
description = "Monitor timeframe for API HTTP 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -121,31 +121,31 @@ variable "http_5xx_requests_threshold_warning" {
|
||||
|
||||
variable "http_4xx_requests_enabled" {
|
||||
description = "Flag to enable API Gateway HTTP 4xx requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "http_4xx_requests_extra_tags" {
|
||||
description = "Extra tags for API Gateway HTTP 4xx requests monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "http_4xx_requests_message" {
|
||||
description = "Custom message for API Gateway HTTP 4xx requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "http_4xx_requests_time_aggregator" {
|
||||
description = "Monitor aggregator for API HTTP 4xx requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "http_4xx_requests_timeframe" {
|
||||
description = "Monitor timeframe for API HTTP 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -163,3 +163,4 @@ variable "artificial_requests_count" {
|
||||
default = 5
|
||||
description = "Number of false requests used to mitigate false positive in case of low trafic"
|
||||
}
|
||||
|
||||
|
||||
@ -1,91 +1,92 @@
|
||||
# Monitoring Api Gateway latency
|
||||
resource "datadog_monitor" "API_Gateway_latency" {
|
||||
count = "${var.latency_enabled == "true" ? 1 : 0}"
|
||||
count = var.latency_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] API Gateway latency {{#is_alert}}{{{comparator}}} {{threshold}}ms ({{value}}ms){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}ms ({{value}}ms){{/is_warning}}"
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.latency_message, var.message)}"
|
||||
message = coalesce(var.latency_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
${var.latency_time_aggregator}(${var.latency_timeframe}):
|
||||
default(avg:aws.apigateway.latency{${var.filter_tags}} by {region,apiname,stage}, 0)
|
||||
> ${var.latency_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
thresholds {
|
||||
warning = "${var.latency_threshold_warning}"
|
||||
critical = "${var.latency_threshold_critical}"
|
||||
thresholds = {
|
||||
warning = var.latency_threshold_warning
|
||||
critical = var.latency_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false # Will NOT notify when no data is received
|
||||
renotify_interval = 0
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:apigateway", "team:claranet", "created-by:terraform", "${var.latency_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:apigateway", "team:claranet", "created-by:terraform", var.latency_extra_tags]
|
||||
}
|
||||
|
||||
# Monitoring API Gateway 5xx errors percent
|
||||
resource "datadog_monitor" "API_http_5xx_errors_count" {
|
||||
count = "${var.http_5xx_requests_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] API Gateway HTTP 5xx errors {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
count = var.http_5xx_requests_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] API Gateway HTTP 5xx errors {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.http_5xx_requests_message, var.message)}"
|
||||
message = coalesce(var.http_5xx_requests_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
${var.http_5xx_requests_time_aggregator}(${var.http_5xx_requests_timeframe}):
|
||||
default(avg:aws.apigateway.5xxerror{${var.filter_tags}} by {region,apiname,stage}.as_rate(), 0) / (
|
||||
default(avg:aws.apigateway.count{${var.filter_tags}} by {region,apiname,stage}.as_rate() + ${var.artificial_requests_count}, 1))
|
||||
* 100 > ${var.http_5xx_requests_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
thresholds {
|
||||
warning = "${var.http_5xx_requests_threshold_warning}"
|
||||
critical = "${var.http_5xx_requests_threshold_critical}"
|
||||
}
|
||||
thresholds = {
|
||||
warning = var.http_5xx_requests_threshold_warning
|
||||
critical = var.http_5xx_requests_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false # Will NOT notify when no data is received
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 1
|
||||
include_tags = true
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 1
|
||||
include_tags = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:apigateway", "team:claranet", "created-by:terraform", "${var.http_5xx_requests_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:apigateway", "team:claranet", "created-by:terraform", var.http_5xx_requests_extra_tags]
|
||||
}
|
||||
|
||||
# Monitoring API Gateway 4xx errors percent
|
||||
resource "datadog_monitor" "API_http_4xx_errors_count" {
|
||||
count = "${var.http_4xx_requests_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] API Gateway HTTP 4xx errors {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
count = var.http_4xx_requests_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] API Gateway HTTP 4xx errors {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.http_4xx_requests_message, var.message)}"
|
||||
message = coalesce(var.http_4xx_requests_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.http_4xx_requests_time_aggregator}(${var.http_4xx_requests_timeframe}):
|
||||
default(avg:aws.apigateway.4xxerror{${var.filter_tags}} by {region,apiname,stage}.as_rate(), 0) / (
|
||||
default(avg:aws.apigateway.count{${var.filter_tags}} by {region,apiname,stage}.as_rate() + ${var.artificial_requests_count}, 1))
|
||||
* 100 > ${var.http_4xx_requests_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
thresholds {
|
||||
warning = "${var.http_4xx_requests_threshold_warning}"
|
||||
critical = "${var.http_4xx_requests_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_no_data = false # Will NOT notify when no data is received
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 1
|
||||
include_tags = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:apigateway", "team:claranet", "created-by:terraform", "${var.http_4xx_requests_extra_tags}"]
|
||||
thresholds = {
|
||||
warning = var.http_4xx_requests_threshold_warning
|
||||
critical = var.http_4xx_requests_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 1
|
||||
include_tags = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:apigateway", "team:claranet", "created-by:terraform", var.http_4xx_requests_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,14 +1,15 @@
|
||||
output "API_Gateway_latency_id" {
|
||||
description = "id for monitor API_Gateway_latency"
|
||||
value = "${datadog_monitor.API_Gateway_latency.*.id}"
|
||||
value = datadog_monitor.API_Gateway_latency.*.id
|
||||
}
|
||||
|
||||
output "API_http_5xx_errors_count_id" {
|
||||
description = "id for monitor API_http_5xx_errors_count"
|
||||
value = "${datadog_monitor.API_http_5xx_errors_count.*.id}"
|
||||
value = datadog_monitor.API_http_5xx_errors_count.*.id
|
||||
}
|
||||
|
||||
output "API_http_4xx_errors_count_id" {
|
||||
description = "id for monitor API_http_4xx_errors_count"
|
||||
value = "${datadog_monitor.API_http_4xx_errors_count.*.id}"
|
||||
value = datadog_monitor.API_http_4xx_errors_count.*.id
|
||||
}
|
||||
|
||||
|
||||
4
cloud/aws/apigateway/versions.tf
Normal file
4
cloud/aws/apigateway/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
# Global Terraform
|
||||
variable "environment" {
|
||||
description = "Infrastructure Environment"
|
||||
type = "string"
|
||||
type = string
|
||||
}
|
||||
|
||||
# Global DataDog
|
||||
@ -43,222 +43,223 @@ variable "filter_tags_custom_excluded" {
|
||||
|
||||
variable "eviction_enabled" {
|
||||
description = "Flag to enable Elasticache eviction monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "eviction_extra_tags" {
|
||||
description = "Extra tags for Elasticache eviction monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "eviction_message" {
|
||||
description = "Custom message for Elasticache eviction monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "eviction_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
variable "eviction_threshold_warning" {
|
||||
description = "Elasticache free memory warning threshold in percentage"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "eviction_threshold_critical" {
|
||||
description = "Elasticache free memory critical threshold in percentage"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 30
|
||||
}
|
||||
|
||||
variable "max_connection_enabled" {
|
||||
description = "Flag to enable Elasticache max connection monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "max_connection_extra_tags" {
|
||||
description = "Extra tags for Elasticache max connection monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "max_connection_message" {
|
||||
description = "Custom message for Elasticache max connection monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "max_connection_time_aggregator" {
|
||||
description = "Monitor aggregator for Elasticache max connection [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "max_connection_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "no_connection_enabled" {
|
||||
description = "Flag to enable Elasticache no connection monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "no_connection_extra_tags" {
|
||||
description = "Extra tags for Elasticache no connection monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "no_connection_message" {
|
||||
description = "Custom message for Elasticache no connection monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "no_connection_time_aggregator" {
|
||||
description = "Monitor aggregator for Elasticache no connection [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "no_connection_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "swap_enabled" {
|
||||
description = "Flag to enable Elasticache swap monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "swap_extra_tags" {
|
||||
description = "Extra tags for Elasticache swap monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "swap_message" {
|
||||
description = "Custom message for Elasticache swap monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "swap_time_aggregator" {
|
||||
description = "Monitor aggregator for Elasticache memcached swap [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "swap_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "swap_threshold_warning" {
|
||||
description = "Elasticache swap warning threshold in bytes"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "swap_threshold_critical" {
|
||||
description = "Elasticache swap critical threshold in bytes"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 50000000
|
||||
}
|
||||
|
||||
variable "free_memory_enabled" {
|
||||
description = "Flag to enable Elasticache free memory monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "free_memory_extra_tags" {
|
||||
description = "Extra tags for Elasticache free memory monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "free_memory_message" {
|
||||
description = "Custom message for Elasticache free memory monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "free_memory_condition_timeframe" {
|
||||
description = "Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
variable "free_memory_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
variable "free_memory_threshold_warning" {
|
||||
description = "Elasticache free memory warning threshold in percentage"
|
||||
type = "string"
|
||||
type = string
|
||||
default = -50
|
||||
}
|
||||
|
||||
variable "free_memory_threshold_critical" {
|
||||
description = "Elasticache free memory critical threshold in percentage"
|
||||
type = "string"
|
||||
type = string
|
||||
default = -70
|
||||
}
|
||||
|
||||
variable "eviction_growing_enabled" {
|
||||
description = "Flag to enable Elasticache eviction growing monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "eviction_growing_extra_tags" {
|
||||
description = "Extra tags for Elasticache eviction growing monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "eviction_growing_message" {
|
||||
description = "Custom message for Elasticache eviction growing monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "eviction_growing_condition_timeframe" {
|
||||
description = "Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "eviction_growing_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "eviction_growing_threshold_warning" {
|
||||
description = "Elasticache eviction growing warning threshold in percentage"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 10
|
||||
}
|
||||
|
||||
variable "eviction_growing_threshold_critical" {
|
||||
description = "Elasticache eviction growing critical threshold in percentage"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 30
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "aws_elasticache"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
}
|
||||
|
||||
|
||||
@ -1,175 +1,170 @@
|
||||
resource "datadog_monitor" "elasticache_eviction" {
|
||||
count = "${var.eviction_enabled == "true" ? 1 : 0}"
|
||||
count = var.eviction_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache eviction {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}"
|
||||
message = "${coalesce(var.eviction_message, var.message)}"
|
||||
|
||||
message = coalesce(var.eviction_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
sum(${var.eviction_timeframe}): (
|
||||
avg:aws.elasticache.evictions${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
) > ${var.eviction_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
warning = "${var.eviction_threshold_warning}"
|
||||
critical = "${var.eviction_threshold_critical}"
|
||||
thresholds = {
|
||||
warning = var.eviction_threshold_warning
|
||||
critical = var.eviction_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.eviction_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", var.eviction_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "elasticache_max_connection" {
|
||||
count = "${var.max_connection_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache max connections reached {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}"
|
||||
message = "${coalesce(var.max_connection_message, var.message)}"
|
||||
|
||||
count = var.max_connection_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache max connections reached {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}"
|
||||
message = coalesce(var.max_connection_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
${var.max_connection_time_aggregator}(${var.max_connection_timeframe}): (
|
||||
avg:aws.elasticache.curr_connections${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
) >= 65000
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.max_connection_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", var.max_connection_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "elasticache_no_connection" {
|
||||
count = "${var.no_connection_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache connections {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}"
|
||||
message = "${coalesce(var.no_connection_message, var.message)}"
|
||||
count = var.no_connection_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache connections {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}"
|
||||
message = coalesce(var.no_connection_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.no_connection_time_aggregator}(${var.no_connection_timeframe}): (
|
||||
avg:aws.elasticache.curr_connections${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
) <= 0
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.no_connection_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", var.no_connection_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "elasticache_swap" {
|
||||
count = "${var.swap_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache swap {{#is_alert}}{{{comparator}}} {{threshold}}MB ({{value}}MB){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}MB ({{value}}MB){{/is_warning}}"
|
||||
message = "${coalesce(var.swap_message, var.message)}"
|
||||
count = var.swap_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache swap {{#is_alert}}{{{comparator}}} {{threshold}}MB ({{value}}MB){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}MB ({{value}}MB){{/is_warning}}"
|
||||
message = coalesce(var.swap_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.swap_time_aggregator}(${var.swap_timeframe}): (
|
||||
avg:aws.elasticache.swap_usage${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
) > ${var.swap_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
warning = "${var.swap_threshold_warning}"
|
||||
critical = "${var.swap_threshold_critical}"
|
||||
thresholds = {
|
||||
warning = var.swap_threshold_warning
|
||||
critical = var.swap_threshold_critical
|
||||
}
|
||||
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.swap_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", var.swap_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "elasticache_free_memory" {
|
||||
count = "${var.free_memory_enabled == "true" ? 1 : 0}"
|
||||
count = var.free_memory_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.free_memory_message, var.message)}"
|
||||
|
||||
message = coalesce(var.free_memory_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
pct_change(avg(${var.free_memory_timeframe}),${var.free_memory_condition_timeframe}):
|
||||
avg:aws.elasticache.freeable_memory${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
< ${var.free_memory_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
warning = "${var.free_memory_threshold_warning}"
|
||||
critical = "${var.free_memory_threshold_critical}"
|
||||
thresholds = {
|
||||
warning = var.free_memory_threshold_warning
|
||||
critical = var.free_memory_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.free_memory_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", var.free_memory_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "elasticache_eviction_growing" {
|
||||
count = "${var.eviction_growing_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache evictions is growing {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}"
|
||||
message = "${coalesce(var.eviction_growing_message, var.message)}"
|
||||
|
||||
count = var.eviction_growing_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache evictions is growing {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}"
|
||||
message = coalesce(var.eviction_growing_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
pct_change(avg(${var.eviction_growing_timeframe}),${var.eviction_growing_condition_timeframe}):
|
||||
avg:aws.elasticache.evictions${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
> ${var.eviction_growing_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
warning = "${var.eviction_growing_threshold_warning}"
|
||||
critical = "${var.eviction_growing_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.eviction_growing_extra_tags}"]
|
||||
thresholds = {
|
||||
warning = var.eviction_growing_threshold_warning
|
||||
critical = var.eviction_growing_threshold_critical
|
||||
}
|
||||
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", var.eviction_growing_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,29 +1,30 @@
|
||||
output "elasticache_eviction_id" {
|
||||
description = "id for monitor elasticache_eviction"
|
||||
value = "${datadog_monitor.elasticache_eviction.*.id}"
|
||||
value = datadog_monitor.elasticache_eviction.*.id
|
||||
}
|
||||
|
||||
output "elasticache_max_connection_id" {
|
||||
description = "id for monitor elasticache_max_connection"
|
||||
value = "${datadog_monitor.elasticache_max_connection.*.id}"
|
||||
value = datadog_monitor.elasticache_max_connection.*.id
|
||||
}
|
||||
|
||||
output "elasticache_no_connection_id" {
|
||||
description = "id for monitor elasticache_no_connection"
|
||||
value = "${datadog_monitor.elasticache_no_connection.*.id}"
|
||||
value = datadog_monitor.elasticache_no_connection.*.id
|
||||
}
|
||||
|
||||
output "elasticache_swap_id" {
|
||||
description = "id for monitor elasticache_swap"
|
||||
value = "${datadog_monitor.elasticache_swap.*.id}"
|
||||
value = datadog_monitor.elasticache_swap.*.id
|
||||
}
|
||||
|
||||
output "elasticache_free_memory_id" {
|
||||
description = "id for monitor elasticache_free_memory"
|
||||
value = "${datadog_monitor.elasticache_free_memory.*.id}"
|
||||
value = datadog_monitor.elasticache_free_memory.*.id
|
||||
}
|
||||
|
||||
output "elasticache_eviction_growing_id" {
|
||||
description = "id for monitor elasticache_eviction_growing"
|
||||
value = "${datadog_monitor.elasticache_eviction_growing.*.id}"
|
||||
value = datadog_monitor.elasticache_eviction_growing.*.id
|
||||
}
|
||||
|
||||
|
||||
4
cloud/aws/elasticache/common/versions.tf
Normal file
4
cloud/aws/elasticache/common/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
# Global Terraform
|
||||
variable "environment" {
|
||||
description = "Infrastructure Environment"
|
||||
type = "string"
|
||||
type = string
|
||||
}
|
||||
|
||||
# Global DataDog
|
||||
@ -43,84 +43,85 @@ variable "filter_tags_custom_excluded" {
|
||||
|
||||
variable "get_hits_enabled" {
|
||||
description = "Flag to enable Elasticache memcached get hits monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "get_hits_extra_tags" {
|
||||
description = "Extra tags for Elasticache memcached get hits monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "get_hits_message" {
|
||||
description = "Custom message for Elasticache memcached get hits monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "get_hits_time_aggregator" {
|
||||
description = "Monitor aggregator for Elasticache memcached get hits [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "get_hits_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache memcached get hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
variable "get_hits_threshold_warning" {
|
||||
description = "Elasticache memcached get hits warning threshold in percentage"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 80
|
||||
}
|
||||
|
||||
variable "get_hits_threshold_critical" {
|
||||
description = "Elasticache memcached get hits critical threshold in percentage"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 60
|
||||
}
|
||||
|
||||
variable "cpu_high_enabled" {
|
||||
description = "Flag to enable Elasticache memcached cpu high monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "cpu_high_extra_tags" {
|
||||
description = "Extra tags for Elasticache memcached cpu high monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cpu_high_message" {
|
||||
description = "Custom message for Elasticache memcached cpu high monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cpu_high_time_aggregator" {
|
||||
description = "Monitor aggregator for Elasticache memcached cpu high [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "cpu_high_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache memcached cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
variable "cpu_high_threshold_warning" {
|
||||
description = "Elasticache memcached cpu high warning threshold in percentage"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 75
|
||||
}
|
||||
|
||||
variable "cpu_high_threshold_critical" {
|
||||
description = "Elasticache memcached cpu high critical threshold in percentage"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 90
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "aws_elasticache"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
}
|
||||
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
resource "datadog_monitor" "memcached_get_hits" {
|
||||
count = "${var.get_hits_enabled == "true" ? 1 : 0}"
|
||||
count = var.get_hits_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache memcached cache hit ratio {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.get_hits_message, var.message)}"
|
||||
message = coalesce(var.get_hits_message, var.message)
|
||||
|
||||
type = "query alert"
|
||||
|
||||
@ -11,30 +11,30 @@ resource "datadog_monitor" "memcached_get_hits" {
|
||||
default(avg:aws.elasticache.get_hits${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}.as_rate(), 0) +
|
||||
default(avg:aws.elasticache.get_misses${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}.as_rate(), 0))
|
||||
) * 100 < ${var.get_hits_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
warning = "${var.get_hits_threshold_warning}"
|
||||
critical = "${var.get_hits_threshold_critical}"
|
||||
thresholds = {
|
||||
warning = var.get_hits_threshold_warning
|
||||
critical = var.get_hits_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
notify_no_data = false
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-memcached", "team:claranet", "created-by:terraform", "engine:memcached", "${var.get_hits_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-memcached", "team:claranet", "created-by:terraform", "engine:memcached", var.get_hits_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "memcached_cpu_high" {
|
||||
count = "${var.cpu_high_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache memcached CPU {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.cpu_high_message, var.message)}"
|
||||
count = var.cpu_high_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache memcached CPU {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.cpu_high_message, var.message)
|
||||
|
||||
type = "query alert"
|
||||
|
||||
@ -42,22 +42,23 @@ resource "datadog_monitor" "memcached_cpu_high" {
|
||||
${var.cpu_high_time_aggregator}(${var.cpu_high_timeframe}): (
|
||||
avg:aws.elasticache.cpuutilization${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
) > ${var.cpu_high_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
warning = "${var.cpu_high_threshold_warning}"
|
||||
critical = "${var.cpu_high_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-memcached", "team:claranet", "created-by:terraform", "engine:memcached", "${var.cpu_high_extra_tags}"]
|
||||
thresholds = {
|
||||
warning = var.cpu_high_threshold_warning
|
||||
critical = var.cpu_high_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-memcached", "team:claranet", "created-by:terraform", "engine:memcached", var.cpu_high_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
output "memcached_get_hits_id" {
|
||||
description = "id for monitor memcached_get_hits"
|
||||
value = "${datadog_monitor.memcached_get_hits.*.id}"
|
||||
value = datadog_monitor.memcached_get_hits.*.id
|
||||
}
|
||||
|
||||
output "memcached_cpu_high_id" {
|
||||
description = "id for monitor memcached_cpu_high"
|
||||
value = "${datadog_monitor.memcached_cpu_high.*.id}"
|
||||
value = datadog_monitor.memcached_cpu_high.*.id
|
||||
}
|
||||
|
||||
|
||||
4
cloud/aws/elasticache/memcached/versions.tf
Normal file
4
cloud/aws/elasticache/memcached/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
# Global Terraform
|
||||
variable "environment" {
|
||||
description = "Infrastructure Environment"
|
||||
type = "string"
|
||||
type = string
|
||||
}
|
||||
|
||||
# Global DataDog
|
||||
@ -43,150 +43,151 @@ variable "filter_tags_custom_excluded" {
|
||||
|
||||
variable "cache_hits_enabled" {
|
||||
description = "Flag to enable Elasticache redis cache hits monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "cache_hits_extra_tags" {
|
||||
description = "Extra tags for Elasticache redis cache hits monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cache_hits_message" {
|
||||
description = "Custom message for Elasticache redis cache hits monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cache_hits_time_aggregator" {
|
||||
description = "Monitor aggregator for Elasticache redis cache hits [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "cache_hits_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache redis cache hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
variable "cache_hits_threshold_warning" {
|
||||
description = "Elasticache redis cache hits warning threshold in percentage"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 80
|
||||
}
|
||||
|
||||
variable "cache_hits_threshold_critical" {
|
||||
description = "Elasticache redis cache hits critical threshold in percentage"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 60
|
||||
}
|
||||
|
||||
variable "cpu_high_enabled" {
|
||||
description = "Flag to enable Elasticache redis cpu high monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "cpu_high_extra_tags" {
|
||||
description = "Extra tags for Elasticache redis cpu high monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cpu_high_message" {
|
||||
description = "Custom message for Elasticache redis cpu high monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cpu_high_time_aggregator" {
|
||||
description = "Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "cpu_high_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
variable "cpu_high_threshold_warning" {
|
||||
description = "Elasticache redis cpu high warning threshold in percentage"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 75
|
||||
}
|
||||
|
||||
variable "cpu_high_threshold_critical" {
|
||||
description = "Elasticache redis cpu high critical threshold in percentage"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 90
|
||||
}
|
||||
|
||||
variable "replication_lag_enabled" {
|
||||
description = "Flag to enable Elasticache redis replication lag monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "replication_lag_extra_tags" {
|
||||
description = "Extra tags for Elasticache redis replication lag monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "replication_lag_message" {
|
||||
description = "Custom message for Elasticache redis replication lag monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "replication_lag_time_aggregator" {
|
||||
description = "Monitor aggregator for Elasticache redis replication lag [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "replication_lag_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_10m"
|
||||
}
|
||||
|
||||
variable "replication_lag_threshold_warning" {
|
||||
description = "Elasticache redis replication lag warning threshold in seconds"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 90
|
||||
}
|
||||
|
||||
variable "replication_lag_threshold_critical" {
|
||||
description = "Elasticache redis replication lag critical threshold in seconds"
|
||||
type = "string"
|
||||
type = string
|
||||
default = 180
|
||||
}
|
||||
|
||||
variable "commands_enabled" {
|
||||
description = "Flag to enable Elasticache redis commands monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "commands_extra_tags" {
|
||||
description = "Extra tags for Elasticache redis commands monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "commands_message" {
|
||||
description = "Custom message for Elasticache redis commands monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "commands_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache redis commands [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "aws_elasticache"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
}
|
||||
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
resource "datadog_monitor" "redis_cache_hits" {
|
||||
count = "${var.cache_hits_enabled == "true" ? 1 : 0}"
|
||||
count = var.cache_hits_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache redis cache hit ratio {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.cache_hits_message, var.message)}"
|
||||
message = coalesce(var.cache_hits_message, var.message)
|
||||
|
||||
type = "query alert"
|
||||
|
||||
@ -11,30 +11,30 @@ resource "datadog_monitor" "redis_cache_hits" {
|
||||
avg:aws.elasticache.cache_hits${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}.as_rate() +
|
||||
avg:aws.elasticache.cache_misses${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}.as_rate())
|
||||
* 100, 100) < ${var.cache_hits_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
warning = "${var.cache_hits_threshold_warning}"
|
||||
critical = "${var.cache_hits_threshold_critical}"
|
||||
thresholds = {
|
||||
warning = var.cache_hits_threshold_warning
|
||||
critical = var.cache_hits_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
notify_no_data = false
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", "${var.cache_hits_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", var.cache_hits_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "redis_cpu_high" {
|
||||
count = "${var.cpu_high_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache redis CPU {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.cpu_high_message, var.message)}"
|
||||
count = var.cpu_high_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache redis CPU {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.cpu_high_message, var.message)
|
||||
|
||||
type = "query alert"
|
||||
|
||||
@ -42,75 +42,76 @@ resource "datadog_monitor" "redis_cpu_high" {
|
||||
${var.cpu_high_time_aggregator}(${var.cpu_high_timeframe}): (
|
||||
avg:aws.elasticache.engine_cpuutilization${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
) > ${var.cpu_high_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
notify_no_data = true
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", "${var.cpu_high_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", var.cpu_high_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "redis_replication_lag" {
|
||||
count = "${var.replication_lag_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache redis replication lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}"
|
||||
message = "${coalesce(var.replication_lag_message, var.message)}"
|
||||
count = var.replication_lag_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache redis replication lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}"
|
||||
message = coalesce(var.replication_lag_message, var.message)
|
||||
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.replication_lag_time_aggregator}(${var.replication_lag_timeframe}): (
|
||||
avg:aws.elasticache.replication_lag${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
) > ${var.replication_lag_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
warning = "${var.replication_lag_threshold_warning}"
|
||||
critical = "${var.replication_lag_threshold_critical}"
|
||||
}
|
||||
thresholds = {
|
||||
warning = var.replication_lag_threshold_warning
|
||||
critical = var.replication_lag_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
notify_no_data = false
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", "${var.replication_lag_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", var.replication_lag_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "redis_commands" {
|
||||
count = "${var.commands_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache redis is receiving no commands"
|
||||
message = "${coalesce(var.commands_message, var.message)}"
|
||||
count = var.commands_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticache redis is receiving no commands"
|
||||
message = coalesce(var.commands_message, var.message)
|
||||
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
sum(${var.commands_timeframe}): (
|
||||
avg:aws.elasticache.get_type_cmds${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}.as_count() +
|
||||
avg:aws.elasticache.set_type_cmds${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}.as_count()
|
||||
) <= 0
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", "${var.commands_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", var.commands_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,19 +1,20 @@
|
||||
output "redis_cache_hits_id" {
|
||||
description = "id for monitor redis_cache_hits"
|
||||
value = "${datadog_monitor.redis_cache_hits.*.id}"
|
||||
value = datadog_monitor.redis_cache_hits.*.id
|
||||
}
|
||||
|
||||
output "redis_cpu_high_id" {
|
||||
description = "id for monitor redis_cpu_high"
|
||||
value = "${datadog_monitor.redis_cpu_high.*.id}"
|
||||
value = datadog_monitor.redis_cpu_high.*.id
|
||||
}
|
||||
|
||||
output "redis_replication_lag_id" {
|
||||
description = "id for monitor redis_replication_lag"
|
||||
value = "${datadog_monitor.redis_replication_lag.*.id}"
|
||||
value = datadog_monitor.redis_replication_lag.*.id
|
||||
}
|
||||
|
||||
output "redis_commands_id" {
|
||||
description = "id for monitor redis_commands"
|
||||
value = "${datadog_monitor.redis_commands.*.id}"
|
||||
value = datadog_monitor.redis_commands.*.id
|
||||
}
|
||||
|
||||
|
||||
4
cloud/aws/elasticache/redis/versions.tf
Normal file
4
cloud/aws/elasticache/redis/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
# Global Terraform
|
||||
variable "environment" {
|
||||
description = "Architecture Environment"
|
||||
type = "string"
|
||||
type = string
|
||||
}
|
||||
|
||||
# Global DataDog
|
||||
@ -43,25 +43,25 @@ variable "filter_tags_custom_excluded" {
|
||||
|
||||
variable "es_cluster_status_enabled" {
|
||||
description = "Flag to enable ES cluster status monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "es_cluster_status_extra_tags" {
|
||||
description = "Extra tags for ES cluster status monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "es_cluster_status_message" {
|
||||
description = "Custom message for ES cluster status monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "es_cluster_status_timeframe" {
|
||||
description = "Monitor timeframe for ES cluster status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_30m"
|
||||
}
|
||||
|
||||
@ -71,31 +71,31 @@ variable "es_cluster_volume_size" {
|
||||
|
||||
variable "diskspace_enabled" {
|
||||
description = "Flag to enable ES cluster diskspace monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "diskspace_extra_tags" {
|
||||
description = "Extra tags for ES cluster diskspace monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "diskspace_message" {
|
||||
description = "Custom message for ES cluster diskspace monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "diskspace_time_aggregator" {
|
||||
description = "Monitor aggregator for ES cluster diskspace [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "diskspace_timeframe" {
|
||||
description = "Monitor timeframe for ES cluster diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
@ -111,31 +111,31 @@ variable "diskspace_threshold_critical" {
|
||||
|
||||
variable "cpu_enabled" {
|
||||
description = "Flag to enable ES cluster cpu monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "cpu_extra_tags" {
|
||||
description = "Extra tags for ES cluster cpu monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cpu_message" {
|
||||
description = "Custom message for ES cluster cpu monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cpu_time_aggregator" {
|
||||
description = "Monitor aggregator for ES cluster cpu [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "cpu_timeframe" {
|
||||
description = "Monitor timeframe for ES cluster cpu [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
@ -148,3 +148,4 @@ variable "cpu_threshold_critical" {
|
||||
description = "CPU usage in percent (critical threshold)"
|
||||
default = "90"
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "aws_elasticsearch"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
}
|
||||
|
||||
|
||||
@ -4,10 +4,9 @@
|
||||
- If aws.es.cluster_statusyellow is 1 --> 1 < query value (=1.1) < 2 : warning
|
||||
Workaround : in the query, we add "0.1" to the result and we use the comparator ">=". No alert was triggered without that. */
|
||||
resource "datadog_monitor" "es_cluster_status" {
|
||||
count = "${var.es_cluster_status_enabled == "true" ? 1 : 0}"
|
||||
count = var.es_cluster_status_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ElasticSearch cluster status is not green"
|
||||
message = "${coalesce(var.es_cluster_status_message, var.message)}"
|
||||
|
||||
message = coalesce(var.es_cluster_status_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
@ -15,32 +14,31 @@ resource "datadog_monitor" "es_cluster_status" {
|
||||
avg:aws.es.cluster_statusred${module.filter-tags.query_alert} by {region,name} * 2 +
|
||||
(avg:aws.es.cluster_statusyellow${module.filter-tags.query_alert} by {region,name} + 0.1)
|
||||
) >= 2
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
warning = 1
|
||||
thresholds = {
|
||||
warning = 1
|
||||
critical = 2
|
||||
}
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
notify_no_data = true
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticsearch", "team:claranet", "created-by:terraform", "${var.es_cluster_status_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticsearch", "team:claranet", "created-by:terraform", var.es_cluster_status_extra_tags]
|
||||
}
|
||||
|
||||
### Elasticsearch cluster free storage space monitor ###
|
||||
resource "datadog_monitor" "es_free_space_low" {
|
||||
count = "${var.diskspace_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ElasticSearch cluster free storage space {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.diskspace_message, var.message)}"
|
||||
|
||||
count = var.diskspace_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ElasticSearch cluster free storage space {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.diskspace_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
@ -48,54 +46,55 @@ resource "datadog_monitor" "es_free_space_low" {
|
||||
avg:aws.es.free_storage_space${module.filter-tags.query_alert} by {region,name} /
|
||||
(${var.es_cluster_volume_size}*1000) * 100
|
||||
) < ${var.diskspace_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
warning = "${var.diskspace_threshold_warning}"
|
||||
critical = "${var.diskspace_threshold_critical}"
|
||||
}
|
||||
thresholds = {
|
||||
warning = var.diskspace_threshold_warning
|
||||
critical = var.diskspace_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
notify_no_data = false
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticsearch", "team:claranet", "created-by:terraform", "${var.diskspace_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticsearch", "team:claranet", "created-by:terraform", var.diskspace_extra_tags]
|
||||
}
|
||||
|
||||
### Elasticsearch cluster CPU monitor ###
|
||||
resource "datadog_monitor" "es_cpu_90_15min" {
|
||||
count = "${var.cpu_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ElasticSearch cluster CPU high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.cpu_message, var.message)}"
|
||||
count = var.cpu_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ElasticSearch cluster CPU high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.cpu_message, var.message)
|
||||
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.cpu_time_aggregator}(${var.cpu_timeframe}): (
|
||||
avg:aws.es.cpuutilization${module.filter-tags.query_alert} by {region,name}
|
||||
) > ${var.cpu_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
warning = "${var.cpu_threshold_warning}"
|
||||
critical = "${var.cpu_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticsearch", "team:claranet", "created-by:terraform", "${var.cpu_extra_tags}"]
|
||||
thresholds = {
|
||||
warning = var.cpu_threshold_warning
|
||||
critical = var.cpu_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticsearch", "team:claranet", "created-by:terraform", var.cpu_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,14 +1,15 @@
|
||||
output "es_cluster_status_id" {
|
||||
description = "id for monitor es_cluster_status"
|
||||
value = "${datadog_monitor.es_cluster_status.*.id}"
|
||||
value = datadog_monitor.es_cluster_status.*.id
|
||||
}
|
||||
|
||||
output "es_free_space_low_id" {
|
||||
description = "id for monitor es_free_space_low"
|
||||
value = "${datadog_monitor.es_free_space_low.*.id}"
|
||||
value = datadog_monitor.es_free_space_low.*.id
|
||||
}
|
||||
|
||||
output "es_cpu_90_15min_id" {
|
||||
description = "id for monitor es_cpu_90_15min"
|
||||
value = "${datadog_monitor.es_cpu_90_15min.*.id}"
|
||||
value = datadog_monitor.es_cpu_90_15min.*.id
|
||||
}
|
||||
|
||||
|
||||
4
cloud/aws/elasticsearch/versions.tf
Normal file
4
cloud/aws/elasticsearch/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
# Global Terraform
|
||||
variable "environment" {
|
||||
description = "Architecture Environment"
|
||||
type = "string"
|
||||
type = string
|
||||
}
|
||||
|
||||
# Global DataDog
|
||||
@ -43,55 +43,55 @@ variable "filter_tags_custom_excluded" {
|
||||
|
||||
variable "elb_no_healthy_instance_enabled" {
|
||||
description = "Flag to enable ELB no healty instance monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "elb_no_healthy_instance_extra_tags" {
|
||||
description = "Extra tags for ELB no healty instance monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "elb_no_healthy_instance_message" {
|
||||
description = "Custom message for ELB no healty instance monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "elb_no_healthy_instance_time_aggregator" {
|
||||
description = "Monitor aggregator for ELB no healty instance [available values: min or max]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "elb_no_healthy_instance_timeframe" {
|
||||
description = "Monitor timeframe for ELB no healty instance [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "elb_4xx_enabled" {
|
||||
description = "Flag to enable ELB 4xx errors monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "elb_4xx_extra_tags" {
|
||||
description = "Extra tags for ELB 4xx errors monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "elb_4xx_message" {
|
||||
description = "Custom message for ELB 4xx errors monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "elb_4xx_timeframe" {
|
||||
description = "Monitor timeframe for ELB 4xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -107,25 +107,25 @@ variable "elb_4xx_threshold_critical" {
|
||||
|
||||
variable "elb_5xx_enabled" {
|
||||
description = "Flag to enable ELB 5xx errors monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "elb_5xx_extra_tags" {
|
||||
description = "Extra tags for ELB 5xx errors monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "elb_5xx_message" {
|
||||
description = "Custom message for ELB 5xx errors monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "elb_5xx_timeframe" {
|
||||
description = "Monitor timeframe for ELB 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -141,25 +141,25 @@ variable "elb_5xx_threshold_critical" {
|
||||
|
||||
variable "elb_backend_4xx_enabled" {
|
||||
description = "Flag to enable ELB backend 4xx errors monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "elb_backend_4xx_extra_tags" {
|
||||
description = "Extra tags for ELB backend 4xx errors monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "elb_backend_4xx_message" {
|
||||
description = "Custom message for ELB backend 4xx errors monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "elb_backend_4xx_timeframe" {
|
||||
description = "Monitor timeframe for ELB backend 4xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -175,25 +175,25 @@ variable "elb_backend_4xx_threshold_critical" {
|
||||
|
||||
variable "elb_backend_5xx_enabled" {
|
||||
description = "Flag to enable ELB backend 5xx errors monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "elb_backend_5xx_extra_tags" {
|
||||
description = "Extra tags for ELB backend 5xx errors monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "elb_backend_5xx_message" {
|
||||
description = "Custom message for ELB backend 5xx errors monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "elb_backend_5xx_timeframe" {
|
||||
description = "Monitor timeframe for ELB backend 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -209,31 +209,31 @@ variable "elb_backend_5xx_threshold_critical" {
|
||||
|
||||
variable "elb_backend_latency_enabled" {
|
||||
description = "Flag to enable ELB backend latency monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "elb_backend_latency_extra_tags" {
|
||||
description = "Extra tags for ELB backend latency monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "elb_backend_latency_message" {
|
||||
description = "Custom message for ELB backend latency monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "elb_backend_latency_time_aggregator" {
|
||||
description = "Monitor aggregator for ELB backend latency [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "elb_backend_latency_timeframe" {
|
||||
description = "Monitor timeframe for ELB backend latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -251,3 +251,4 @@ variable "artificial_requests_count" {
|
||||
default = 5
|
||||
description = "Number of false requests used to mitigate false positive in case of low trafic"
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "aws_elb"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
}
|
||||
|
||||
|
||||
@ -1,7 +1,8 @@
|
||||
resource "datadog_monitor" "ELB_no_healthy_instances" {
|
||||
count = "${var.elb_no_healthy_instance_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ELB healthy instances {{#is_alert}}is at 0{{/is_alert}}{{#is_warning}}is at {{value}}%{{/is_warning}}"
|
||||
message = "${coalesce(var.elb_no_healthy_instance_message, var.message)}"
|
||||
count = var.elb_no_healthy_instance_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ELB healthy instances {{#is_alert}}is at 0{{/is_alert}}{{#is_warning}}is at {{value}}%%{{/is_warning}}"
|
||||
message = coalesce(var.elb_no_healthy_instance_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
${var.elb_no_healthy_instance_time_aggregator}(${var.elb_no_healthy_instance_timeframe}): (
|
||||
@ -9,183 +10,177 @@ resource "datadog_monitor" "ELB_no_healthy_instances" {
|
||||
sum:aws.elb.healthy_host_count.maximum${module.filter-tags.query_alert} by {region,loadbalancername} +
|
||||
sum:aws.elb.un_healthy_host_count.maximum${module.filter-tags.query_alert} by {region,loadbalancername} )
|
||||
) * 100 < 1
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
type = "query alert"
|
||||
|
||||
thresholds {
|
||||
thresholds = {
|
||||
critical = 1
|
||||
warning = 100
|
||||
warning = 100
|
||||
}
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
notify_no_data = true
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elb", "team:claranet", "created-by:terraform", "${var.elb_no_healthy_instance_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elb", "team:claranet", "created-by:terraform", var.elb_no_healthy_instance_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "ELB_too_much_4xx" {
|
||||
count = "${var.elb_4xx_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ELB 4xx errors too high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.elb_4xx_message, var.message)}"
|
||||
count = var.elb_4xx_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ELB 4xx errors too high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.elb_4xx_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
sum(${var.elb_4xx_timeframe}):
|
||||
default(avg:aws.elb.httpcode_elb_4xx${module.filter-tags.query_alert} by {region,loadbalancername}.as_rate(), 0) / (
|
||||
default(avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername}.as_rate() + ${var.artificial_requests_count}, 1))
|
||||
* 100 > ${var.elb_4xx_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
type = "query alert"
|
||||
thresholds = {
|
||||
warning = var.elb_4xx_threshold_warning
|
||||
critical = var.elb_4xx_threshold_critical
|
||||
}
|
||||
|
||||
thresholds {
|
||||
warning = "${var.elb_4xx_threshold_warning}"
|
||||
critical = "${var.elb_4xx_threshold_critical}"
|
||||
}
|
||||
notify_no_data = false
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elb", "team:claranet", "created-by:terraform", "${var.elb_4xx_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elb", "team:claranet", "created-by:terraform", var.elb_4xx_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "ELB_too_much_5xx" {
|
||||
count = "${var.elb_5xx_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ELB 5xx errors too high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.elb_5xx_message, var.message)}"
|
||||
count = var.elb_5xx_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ELB 5xx errors too high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.elb_5xx_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
sum(${var.elb_5xx_timeframe}):
|
||||
default(avg:aws.elb.httpcode_elb_5xx${module.filter-tags.query_alert} by {region,loadbalancername}.as_rate(), 0) / (
|
||||
default(avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername}.as_rate() + ${var.artificial_requests_count}, 1))
|
||||
* 100 > ${var.elb_5xx_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
type = "query alert"
|
||||
thresholds = {
|
||||
warning = var.elb_5xx_threshold_warning
|
||||
critical = var.elb_5xx_threshold_critical
|
||||
}
|
||||
|
||||
thresholds {
|
||||
warning = "${var.elb_5xx_threshold_warning}"
|
||||
critical = "${var.elb_5xx_threshold_critical}"
|
||||
}
|
||||
notify_no_data = false
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elb", "team:claranet", "created-by:terraform", "${var.elb_5xx_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elb", "team:claranet", "created-by:terraform", var.elb_5xx_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "ELB_too_much_4xx_backend" {
|
||||
count = "${var.elb_backend_4xx_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ELB backend 4xx errors too high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.elb_backend_4xx_message, var.message)}"
|
||||
count = var.elb_backend_4xx_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ELB backend 4xx errors too high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.elb_backend_4xx_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
sum(${var.elb_backend_4xx_timeframe}):
|
||||
default(avg:aws.elb.httpcode_backend_4xx${module.filter-tags.query_alert} by {region,loadbalancername}.as_rate(), 0) / (
|
||||
default(avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername}.as_rate() + ${var.artificial_requests_count}, 1))
|
||||
* 100 > ${var.elb_backend_4xx_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
type = "query alert"
|
||||
|
||||
thresholds {
|
||||
warning = "${var.elb_backend_4xx_threshold_warning}"
|
||||
critical = "${var.elb_backend_4xx_threshold_critical}"
|
||||
thresholds = {
|
||||
warning = var.elb_backend_4xx_threshold_warning
|
||||
critical = var.elb_backend_4xx_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elb", "team:claranet", "created-by:terraform", "${var.elb_backend_4xx_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elb", "team:claranet", "created-by:terraform", var.elb_backend_4xx_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "ELB_too_much_5xx_backend" {
|
||||
count = "${var.elb_backend_5xx_enabled == "true" ? 1 : 0}"
|
||||
count = var.elb_backend_5xx_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ELB backend 5xx errors too high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.elb_backend_5xx_message, var.message)}"
|
||||
message = coalesce(var.elb_backend_5xx_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
sum(${var.elb_backend_5xx_timeframe}):
|
||||
default(avg:aws.elb.httpcode_backend_5xx${module.filter-tags.query_alert} by {region,loadbalancername}.as_rate(), 0) / (
|
||||
default(avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername}.as_rate() + ${var.artificial_requests_count}, 1))
|
||||
* 100 > ${var.elb_backend_5xx_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
type = "query alert"
|
||||
|
||||
thresholds {
|
||||
warning = "${var.elb_backend_5xx_threshold_warning}"
|
||||
critical = "${var.elb_backend_5xx_threshold_critical}"
|
||||
thresholds = {
|
||||
warning = var.elb_backend_5xx_threshold_warning
|
||||
critical = var.elb_backend_5xx_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
notify_no_data = false
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elb", "team:claranet", "created-by:terraform", "${var.elb_backend_5xx_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elb", "team:claranet", "created-by:terraform", var.elb_backend_5xx_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "ELB_backend_latency" {
|
||||
count = "${var.elb_backend_latency_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ELB latency too high {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}"
|
||||
message = "${coalesce(var.elb_backend_latency_message, var.message)}"
|
||||
count = var.elb_backend_latency_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ELB latency too high {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}"
|
||||
message = coalesce(var.elb_backend_latency_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
${var.elb_backend_latency_time_aggregator}(${var.elb_backend_latency_timeframe}):
|
||||
default(avg:aws.elb.latency${module.filter-tags.query_alert} by {region,loadbalancername}, 0)
|
||||
> ${var.elb_backend_latency_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
type = "query alert"
|
||||
|
||||
thresholds {
|
||||
warning = "${var.elb_backend_latency_warning}"
|
||||
critical = "${var.elb_backend_latency_critical}"
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elb", "team:claranet", "created-by:terraform", "${var.elb_backend_latency_extra_tags}"]
|
||||
thresholds = {
|
||||
warning = var.elb_backend_latency_warning
|
||||
critical = var.elb_backend_latency_critical
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elb", "team:claranet", "created-by:terraform", var.elb_backend_latency_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,29 +1,30 @@
|
||||
output "ELB_no_healthy_instances_id" {
|
||||
description = "id for monitor ELB_no_healthy_instances"
|
||||
value = "${datadog_monitor.ELB_no_healthy_instances.*.id}"
|
||||
value = datadog_monitor.ELB_no_healthy_instances.*.id
|
||||
}
|
||||
|
||||
output "ELB_too_much_4xx_id" {
|
||||
description = "id for monitor ELB_too_much_4xx"
|
||||
value = "${datadog_monitor.ELB_too_much_4xx.*.id}"
|
||||
value = datadog_monitor.ELB_too_much_4xx.*.id
|
||||
}
|
||||
|
||||
output "ELB_too_much_5xx_id" {
|
||||
description = "id for monitor ELB_too_much_5xx"
|
||||
value = "${datadog_monitor.ELB_too_much_5xx.*.id}"
|
||||
value = datadog_monitor.ELB_too_much_5xx.*.id
|
||||
}
|
||||
|
||||
output "ELB_too_much_4xx_backend_id" {
|
||||
description = "id for monitor ELB_too_much_4xx_backend"
|
||||
value = "${datadog_monitor.ELB_too_much_4xx_backend.*.id}"
|
||||
value = datadog_monitor.ELB_too_much_4xx_backend.*.id
|
||||
}
|
||||
|
||||
output "ELB_too_much_5xx_backend_id" {
|
||||
description = "id for monitor ELB_too_much_5xx_backend"
|
||||
value = "${datadog_monitor.ELB_too_much_5xx_backend.*.id}"
|
||||
value = datadog_monitor.ELB_too_much_5xx_backend.*.id
|
||||
}
|
||||
|
||||
output "ELB_backend_latency_id" {
|
||||
description = "id for monitor ELB_backend_latency"
|
||||
value = "${datadog_monitor.ELB_backend_latency.*.id}"
|
||||
value = datadog_monitor.ELB_backend_latency.*.id
|
||||
}
|
||||
|
||||
|
||||
4
cloud/aws/elb/versions.tf
Normal file
4
cloud/aws/elb/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
# Global Terraform
|
||||
variable "environment" {
|
||||
description = "Environment"
|
||||
type = "string"
|
||||
type = string
|
||||
}
|
||||
|
||||
# Global DataDog
|
||||
@ -43,19 +43,19 @@ variable "filter_tags_custom_excluded" {
|
||||
|
||||
variable "incoming_records_enabled" {
|
||||
description = "Flag to enable Kinesis Firehorse incoming records monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "incoming_records_extra_tags" {
|
||||
description = "Extra tags for Kinesis Firehorse incoming records monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "incoming_records_message" {
|
||||
description = "Custom message for Kinesis Firehorse incoming records monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
@ -63,3 +63,4 @@ variable "incoming_records_timeframe" {
|
||||
description = "Monitor timeframe for incoming records metrics evaluation [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "aws_kinesis-firehose"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
}
|
||||
|
||||
|
||||
@ -1,30 +1,30 @@
|
||||
### Kinesis Firehose Incoming records ###
|
||||
resource "datadog_monitor" "firehose_incoming_records" {
|
||||
count = "${var.incoming_records_enabled == "true" ? 1 : 0}"
|
||||
count = var.incoming_records_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Kinesis Firehose No incoming records"
|
||||
message = "${coalesce(var.incoming_records_message, var.message)}"
|
||||
|
||||
message = coalesce(var.incoming_records_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
sum(${var.incoming_records_timeframe}): (
|
||||
avg:aws.firehose.incoming_records${module.filter-tags.query_alert} by {region,deliverystreamname}
|
||||
) <= 0
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
thresholds = {
|
||||
critical = 0
|
||||
}
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
notify_no_data = true
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:kinesis-firehose", "team:claranet", "created-by:terraform", "${var.incoming_records_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:kinesis-firehose", "team:claranet", "created-by:terraform", var.incoming_records_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
output "firehose_incoming_records_id" {
|
||||
description = "id for monitor firehose_incoming_records"
|
||||
value = "${datadog_monitor.firehose_incoming_records.*.id}"
|
||||
value = datadog_monitor.firehose_incoming_records.*.id
|
||||
}
|
||||
|
||||
|
||||
4
cloud/aws/kinesis-firehose/versions.tf
Normal file
4
cloud/aws/kinesis-firehose/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
# Global Terraform
|
||||
variable "environment" {
|
||||
description = "Architecture Environment"
|
||||
type = "string"
|
||||
type = string
|
||||
}
|
||||
|
||||
# Global DataDog
|
||||
@ -43,25 +43,25 @@ variable "filter_tags_custom_excluded" {
|
||||
|
||||
variable "aurora_replicalag_enabled" {
|
||||
description = "Flag to enable RDS Aurora replica lag monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "aurora_replicalag_extra_tags" {
|
||||
description = "Extra tags for RDS Aurora replica lag monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "aurora_replicalag_message" {
|
||||
description = "Custom message for RDS Aurora replica lag monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "aurora_replicalag_timeframe" {
|
||||
description = "Monitor timeframe for RDS Aurora replica lag monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -74,3 +74,4 @@ variable "aurora_replicalag_threshold_critical" {
|
||||
description = "Aurora replica lag in milliseconds (critical threshold)"
|
||||
default = "200"
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "aws_rds"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
}
|
||||
|
||||
|
||||
@ -1,30 +1,30 @@
|
||||
### RDS Aurora Mysql Replica Lag monitor ###
|
||||
resource "datadog_monitor" "rds_aurora_mysql_replica_lag" {
|
||||
count = "${var.aurora_replicalag_enabled == "true" ? 1 : 0}"
|
||||
count = var.aurora_replicalag_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] RDS Aurora Mysql replica lag {{#is_alert}}{{{comparator}}} {{threshold}} ms ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ms ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.aurora_replicalag_message, var.message)}"
|
||||
|
||||
message = coalesce(var.aurora_replicalag_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
avg(${var.aurora_replicalag_timeframe}): (
|
||||
avg:aws.rds.aurora_replica_lag${module.filter-tags.query_alert} by {region,name}
|
||||
) > ${var.aurora_replicalag_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
warning = "${var.aurora_replicalag_threshold_warning}"
|
||||
critical = "${var.aurora_replicalag_threshold_critical}"
|
||||
thresholds = {
|
||||
warning = var.aurora_replicalag_threshold_warning
|
||||
critical = var.aurora_replicalag_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
notify_no_data = true
|
||||
evaluation_delay = var.evaluation_delay
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:rds-aurora-mysql", "team:claranet", "created-by:terraform", "${var.aurora_replicalag_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:rds-aurora-mysql", "team:claranet", "created-by:terraform", var.aurora_replicalag_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
output "rds_aurora_mysql_replica_lag_id" {
|
||||
description = "id for monitor rds_aurora_mysql_replica_lag"
|
||||
value = "${datadog_monitor.rds_aurora_mysql_replica_lag.*.id}"
|
||||
value = datadog_monitor.rds_aurora_mysql_replica_lag.*.id
|
||||
}
|
||||
|
||||
|
||||
4
cloud/aws/rds/aurora/mysql/versions.tf
Normal file
4
cloud/aws/rds/aurora/mysql/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
# Global Terraform
|
||||
variable "environment" {
|
||||
description = "Architecture Environment"
|
||||
type = "string"
|
||||
type = string
|
||||
}
|
||||
|
||||
# Global DataDog
|
||||
@ -43,25 +43,25 @@ variable "filter_tags_custom_excluded" {
|
||||
|
||||
variable "aurora_replicalag_enabled" {
|
||||
description = "Flag to enable RDS Aurora replica lag monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "aurora_replicalag_extra_tags" {
|
||||
description = "Extra tags for RDS Aurora replica lag monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "aurora_replicalag_message" {
|
||||
description = "Custom message for RDS Aurora replica lag monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "aurora_replicalag_timeframe" {
|
||||
description = "Monitor timeframe for RDS Aurora replica lag monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -74,3 +74,4 @@ variable "aurora_replicalag_threshold_critical" {
|
||||
description = "Aurora replica lag in milliseconds (critical threshold)"
|
||||
default = "200"
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "aws_rds"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
}
|
||||
|
||||
|
||||
@ -1,30 +1,30 @@
|
||||
### RDS Aurora Postgresql Replica Lag monitor ###
|
||||
resource "datadog_monitor" "rds_aurora_postgresql_replica_lag" {
|
||||
count = "${var.aurora_replicalag_enabled == "true" ? 1 : 0}"
|
||||
count = var.aurora_replicalag_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] RDS Aurora PostgreSQL replica lag {{#is_alert}}{{{comparator}}} {{threshold}} ms ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ms ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.aurora_replicalag_message, var.message)}"
|
||||
|
||||
message = coalesce(var.aurora_replicalag_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
avg(${var.aurora_replicalag_timeframe}): (
|
||||
avg:aws.rds.rdsto_aurora_postgre_sqlreplica_lag${module.filter-tags.query_alert} by {region,name}
|
||||
) > ${var.aurora_replicalag_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
warning = "${var.aurora_replicalag_threshold_warning}"
|
||||
critical = "${var.aurora_replicalag_threshold_critical}"
|
||||
thresholds = {
|
||||
warning = var.aurora_replicalag_threshold_warning
|
||||
critical = var.aurora_replicalag_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
notify_no_data = true
|
||||
evaluation_delay = var.evaluation_delay
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:rds-aurora-postgresql", "team:claranet", "created-by:terraform", "${var.aurora_replicalag_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:rds-aurora-postgresql", "team:claranet", "created-by:terraform", var.aurora_replicalag_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
output "rds_aurora_postgresql_replica_lag_id" {
|
||||
description = "id for monitor rds_aurora_postgresql_replica_lag"
|
||||
value = "${datadog_monitor.rds_aurora_postgresql_replica_lag.*.id}"
|
||||
value = datadog_monitor.rds_aurora_postgresql_replica_lag.*.id
|
||||
}
|
||||
|
||||
|
||||
4
cloud/aws/rds/aurora/postgresql/versions.tf
Normal file
4
cloud/aws/rds/aurora/postgresql/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
# Global Terraform
|
||||
variable "environment" {
|
||||
description = "Architecture Environment"
|
||||
type = "string"
|
||||
type = string
|
||||
}
|
||||
|
||||
# Global DataDog
|
||||
@ -43,31 +43,31 @@ variable "filter_tags_custom_excluded" {
|
||||
|
||||
variable "cpu_enabled" {
|
||||
description = "Flag to enable RDS CPU usage monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "cpu_extra_tags" {
|
||||
description = "Extra tags for RDS CPU usage monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cpu_message" {
|
||||
description = "Custom message for RDS CPU usage monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cpu_time_aggregator" {
|
||||
description = "Monitor aggregator for RDS CPU usage [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "cpu_timeframe" {
|
||||
description = "Monitor timeframe for RDS CPU usage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
@ -83,31 +83,31 @@ variable "cpu_threshold_critical" {
|
||||
|
||||
variable "diskspace_enabled" {
|
||||
description = "Flag to enable RDS free diskspace monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "diskspace_extra_tags" {
|
||||
description = "Extra tags for RDS free diskspace monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "diskspace_message" {
|
||||
description = "Custom message for RDS free diskspace monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "diskspace_time_aggregator" {
|
||||
description = "Monitor aggregator for RDS free diskspace [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "diskspace_timeframe" {
|
||||
description = "Monitor timeframe for RDS free diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
@ -123,25 +123,25 @@ variable "diskspace_threshold_critical" {
|
||||
|
||||
variable "replicalag_enabled" {
|
||||
description = "Flag to enable RDS replica lag monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "replicalag_extra_tags" {
|
||||
description = "Extra tags for RDS replica lag monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "replicalag_message" {
|
||||
description = "Custom message for RDS replica lag monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "replicalag_timeframe" {
|
||||
description = "Monitor timeframe for RDS replica lag monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -154,3 +154,4 @@ variable "replicalag_threshold_critical" {
|
||||
description = "replica lag in seconds (critical threshold)"
|
||||
default = "300"
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "aws_rds"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
}
|
||||
|
||||
|
||||
@ -1,40 +1,38 @@
|
||||
### RDS instance CPU monitor ###
|
||||
resource "datadog_monitor" "rds_cpu_90_15min" {
|
||||
count = "${var.cpu_enabled == "true" ? 1 : 0}"
|
||||
count = var.cpu_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] RDS instance CPU high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.cpu_message, var.message)}"
|
||||
|
||||
message = coalesce(var.cpu_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
${var.cpu_time_aggregator}(${var.cpu_timeframe}): (
|
||||
avg:aws.rds.cpuutilization${module.filter-tags.query_alert} by {region,name}
|
||||
) > ${var.cpu_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
warning = "${var.cpu_threshold_warning}"
|
||||
critical = "${var.cpu_threshold_critical}"
|
||||
thresholds = {
|
||||
warning = var.cpu_threshold_warning
|
||||
critical = var.cpu_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
notify_no_data = false
|
||||
evaluation_delay = var.evaluation_delay
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:rds", "team:claranet", "created-by:terraform", "${var.cpu_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:rds", "team:claranet", "created-by:terraform", var.cpu_extra_tags]
|
||||
}
|
||||
|
||||
### RDS instance free space monitor ###
|
||||
resource "datadog_monitor" "rds_free_space_low" {
|
||||
count = "${var.diskspace_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] RDS instance free space {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.diskspace_message, var.message)}"
|
||||
|
||||
count = var.diskspace_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] RDS instance free space {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.diskspace_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
@ -42,52 +40,52 @@ resource "datadog_monitor" "rds_free_space_low" {
|
||||
avg:aws.rds.free_storage_space${module.filter-tags.query_alert} by {region,name} /
|
||||
avg:aws.rds.total_storage_space${module.filter-tags.query_alert} by {region,name} * 100
|
||||
) < ${var.diskspace_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
warning = "${var.diskspace_threshold_warning}"
|
||||
critical = "${var.diskspace_threshold_critical}"
|
||||
}
|
||||
thresholds = {
|
||||
warning = var.diskspace_threshold_warning
|
||||
critical = var.diskspace_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
notify_no_data = true
|
||||
evaluation_delay = var.evaluation_delay
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:rds", "team:claranet", "created-by:terraform", "${var.diskspace_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:rds", "team:claranet", "created-by:terraform", var.diskspace_extra_tags]
|
||||
}
|
||||
|
||||
### RDS Replica Lag monitor ###
|
||||
resource "datadog_monitor" "rds_replica_lag" {
|
||||
count = "${var.replicalag_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] RDS replica lag {{#is_alert}}{{{comparator}}} {{threshold}} ms ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ms ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.replicalag_message, var.message)}"
|
||||
|
||||
count = var.replicalag_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] RDS replica lag {{#is_alert}}{{{comparator}}} {{threshold}} ms ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ms ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.replicalag_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
avg(${var.replicalag_timeframe}): (
|
||||
avg:aws.rds.replica_lag${module.filter-tags.query_alert} by {region,name}
|
||||
) > ${var.replicalag_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
warning = "${var.replicalag_threshold_warning}"
|
||||
critical = "${var.replicalag_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:rds", "team:claranet", "created-by:terraform", "${var.replicalag_extra_tags}"]
|
||||
thresholds = {
|
||||
warning = var.replicalag_threshold_warning
|
||||
critical = var.replicalag_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = var.evaluation_delay
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:rds", "team:claranet", "created-by:terraform", var.replicalag_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,14 +1,15 @@
|
||||
output "rds_cpu_90_15min_id" {
|
||||
description = "id for monitor rds_cpu_90_15min"
|
||||
value = "${datadog_monitor.rds_cpu_90_15min.*.id}"
|
||||
value = datadog_monitor.rds_cpu_90_15min.*.id
|
||||
}
|
||||
|
||||
output "rds_free_space_low_id" {
|
||||
description = "id for monitor rds_free_space_low"
|
||||
value = "${datadog_monitor.rds_free_space_low.*.id}"
|
||||
value = datadog_monitor.rds_free_space_low.*.id
|
||||
}
|
||||
|
||||
output "rds_replica_lag_id" {
|
||||
description = "id for monitor rds_replica_lag"
|
||||
value = "${datadog_monitor.rds_replica_lag.*.id}"
|
||||
value = datadog_monitor.rds_replica_lag.*.id
|
||||
}
|
||||
|
||||
|
||||
4
cloud/aws/rds/common/versions.tf
Normal file
4
cloud/aws/rds/common/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
# Global Terraform
|
||||
variable "environment" {
|
||||
description = "Architecture Environment"
|
||||
type = "string"
|
||||
type = string
|
||||
}
|
||||
|
||||
# Global DataDog
|
||||
@ -31,30 +31,31 @@ variable "filter_tags" {
|
||||
|
||||
variable "vpn_status_enabled" {
|
||||
description = "Flag to enable VPN status monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "vpn_status_extra_tags" {
|
||||
description = "Extra tags for VPN status monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "vpn_status_message" {
|
||||
description = "Custom message for VPN status monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "vpn_status_time_aggregator" {
|
||||
description = "Monitor aggregator for VPN status [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "vpn_status_timeframe" {
|
||||
description = "Monitor timeframe for VPN status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
|
||||
@ -1,24 +1,24 @@
|
||||
resource "datadog_monitor" "VPN_status" {
|
||||
count = "${var.vpn_status_enabled == "true" ? 1 : 0}"
|
||||
count = var.vpn_status_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] VPN tunnel down"
|
||||
message = "${coalesce(var.vpn_status_message, var.message)}"
|
||||
message = coalesce(var.vpn_status_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
${var.vpn_status_time_aggregator}(${var.vpn_status_timeframe}): (
|
||||
min:aws.vpn.tunnel_state{${var.filter_tags}} by {region,tunnelipaddress}
|
||||
) < 1
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
type = "query alert"
|
||||
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = false
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:vpn", "team:claranet", "created-by:terraform", "${var.vpn_status_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:vpn", "team:claranet", "created-by:terraform", var.vpn_status_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
output "VPN_status_id" {
|
||||
description = "id for monitor VPN_status"
|
||||
value = "${datadog_monitor.VPN_status.*.id}"
|
||||
value = datadog_monitor.VPN_status.*.id
|
||||
}
|
||||
|
||||
|
||||
4
cloud/aws/vpn/versions.tf
Normal file
4
cloud/aws/vpn/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
# Global Terraform
|
||||
variable "environment" {
|
||||
description = "Architecture environment"
|
||||
type = "string"
|
||||
type = string
|
||||
}
|
||||
|
||||
# Global DataDog
|
||||
@ -43,61 +43,61 @@ variable "filter_tags_custom_excluded" {
|
||||
|
||||
variable "status_enabled" {
|
||||
description = "Flag to enable API Management status monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "status_extra_tags" {
|
||||
description = "Extra tags for API Management status monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "status_message" {
|
||||
description = "Custom message for API Management status monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "status_time_aggregator" {
|
||||
description = "Monitor aggregator for API Management status [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "status_timeframe" {
|
||||
description = "Monitor timeframe for API Management status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "failed_requests_enabled" {
|
||||
description = "Flag to enable API Management failed requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "failed_requests_extra_tags" {
|
||||
description = "Extra tags for API Management failed requests monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "failed_requests_message" {
|
||||
description = "Custom message for API Management failed requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "failed_requests_time_aggregator" {
|
||||
description = "Monitor aggregator for API Management failed requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "failed_requests_timeframe" {
|
||||
description = "Monitor timeframe for API Management failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -113,31 +113,31 @@ variable "failed_requests_threshold_warning" {
|
||||
|
||||
variable "other_requests_enabled" {
|
||||
description = "Flag to enable API Management other requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "other_requests_extra_tags" {
|
||||
description = "Extra tags for API Management other requests monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "other_requests_message" {
|
||||
description = "Custom message for API Management other requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "other_requests_time_aggregator" {
|
||||
description = "Monitor aggregator for API Management other requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "other_requests_timeframe" {
|
||||
description = "Monitor timeframe for API Management other requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -153,31 +153,31 @@ variable "other_requests_threshold_warning" {
|
||||
|
||||
variable "unauthorized_requests_enabled" {
|
||||
description = "Flag to enable API Management unauthorized requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "unauthorized_requests_extra_tags" {
|
||||
description = "Extra tags for API Management unauthorized requests monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "unauthorized_requests_message" {
|
||||
description = "Custom message for API Management unauthorized requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "unauthorized_requests_time_aggregator" {
|
||||
description = "Monitor aggregator for API Management unauthorized requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "unauthorized_requests_timeframe" {
|
||||
description = "Monitor timeframe for API Management unauthorized requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -193,31 +193,31 @@ variable "unauthorized_requests_threshold_warning" {
|
||||
|
||||
variable "successful_requests_enabled" {
|
||||
description = "Flag to enable API Management successful requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "successful_requests_extra_tags" {
|
||||
description = "Extra tags for API Management successful requests monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "successful_requests_message" {
|
||||
description = "Custom message for API Management successful requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "successful_requests_time_aggregator" {
|
||||
description = "Monitor aggregator for API Management successful requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "successful_requests_timeframe" {
|
||||
description = "Monitor timeframe for API Management successful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -230,3 +230,4 @@ variable "successful_requests_threshold_warning" {
|
||||
description = "Warning regarding acceptable percent of successful requests"
|
||||
default = 30
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "azure_apimanagement"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
}
|
||||
|
||||
|
||||
@ -1,128 +1,128 @@
|
||||
resource "datadog_monitor" "apimgt_status" {
|
||||
count = "${var.status_enabled == "true" ? 1 : 0}"
|
||||
count = var.status_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] API Management is down"
|
||||
message = "${coalesce(var.status_message, var.message)}"
|
||||
message = coalesce(var.status_message, var.message)
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOQ
|
||||
${var.status_time_aggregator}(${var.status_timeframe}):avg:azure.apimanagement_service.status${module.filter-tags.query_alert} by {resource_group,region,name} < 1
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
thresholds {
|
||||
thresholds = {
|
||||
critical = 1
|
||||
}
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
notify_no_data = true
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
new_host_delay = var.new_host_delay
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:apimanagement", "team:claranet", "created-by:terraform", "${var.status_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:apimanagement", "team:claranet", "created-by:terraform", var.status_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "apimgt_failed_requests" {
|
||||
count = "${var.failed_requests_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] API Management too many failed requests {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.failed_requests_message, var.message)}"
|
||||
count = var.failed_requests_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] API Management too many failed requests {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.failed_requests_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
${var.failed_requests_time_aggregator}(${var.failed_requests_timeframe}): (
|
||||
default(avg:azure.apimanagement_service.failed_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) /
|
||||
default(avg:azure.apimanagement_service.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
|
||||
) * 100 > ${var.failed_requests_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
critical = "${var.failed_requests_threshold_critical}"
|
||||
warning = "${var.failed_requests_threshold_warning}"
|
||||
}
|
||||
thresholds = {
|
||||
critical = var.failed_requests_threshold_critical
|
||||
warning = var.failed_requests_threshold_warning
|
||||
}
|
||||
|
||||
type = "query alert"
|
||||
notify_no_data = false
|
||||
notify_audit = false
|
||||
timeout_h = 1
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_no_data = false
|
||||
notify_audit = false
|
||||
timeout_h = 1
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = var.new_host_delay
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:apimanagement", "team:claranet", "created-by:terraform", "${var.failed_requests_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:apimanagement", "team:claranet", "created-by:terraform", var.failed_requests_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "apimgt_other_requests" {
|
||||
count = "${var.other_requests_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] API Management too many other requests {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.other_requests_message, var.message)}"
|
||||
count = var.other_requests_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] API Management too many other requests {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.other_requests_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.other_requests_time_aggregator}(${var.other_requests_timeframe}): (
|
||||
default(avg:azure.apimanagement_service.other_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) /
|
||||
default(avg:azure.apimanagement_service.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
|
||||
) * 100 > ${var.other_requests_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
critical = "${var.other_requests_threshold_critical}"
|
||||
warning = "${var.other_requests_threshold_warning}"
|
||||
}
|
||||
thresholds = {
|
||||
critical = var.other_requests_threshold_critical
|
||||
warning = var.other_requests_threshold_warning
|
||||
}
|
||||
|
||||
type = "query alert"
|
||||
notify_no_data = false
|
||||
notify_audit = false
|
||||
timeout_h = 1
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_no_data = false
|
||||
notify_audit = false
|
||||
timeout_h = 1
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = var.new_host_delay
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:apimanagement", "team:claranet", "created-by:terraform", "${var.other_requests_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:apimanagement", "team:claranet", "created-by:terraform", var.other_requests_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "apimgt_unauthorized_requests" {
|
||||
count = "${var.unauthorized_requests_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] API Management too many unauthorized requests {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.unauthorized_requests_message, var.message)}"
|
||||
count = var.unauthorized_requests_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] API Management too many unauthorized requests {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.unauthorized_requests_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.unauthorized_requests_time_aggregator}(${var.unauthorized_requests_timeframe}): (
|
||||
default(avg:azure.apimanagement_service.unauthorized_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) /
|
||||
default(avg:azure.apimanagement_service.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
|
||||
) * 100 > ${var.unauthorized_requests_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
critical = "${var.unauthorized_requests_threshold_critical}"
|
||||
warning = "${var.unauthorized_requests_threshold_warning}"
|
||||
thresholds = {
|
||||
critical = var.unauthorized_requests_threshold_critical
|
||||
warning = var.unauthorized_requests_threshold_warning
|
||||
}
|
||||
|
||||
type = "query alert"
|
||||
notify_no_data = false
|
||||
notify_audit = false
|
||||
timeout_h = 1
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = var.new_host_delay
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:apimanagement", "team:claranet", "created-by:terraform", "${var.unauthorized_requests_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:apimanagement", "team:claranet", "created-by:terraform", var.unauthorized_requests_extra_tags]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "apimgt_successful_requests" {
|
||||
count = "${var.successful_requests_enabled == "true" ? 1 : 0}"
|
||||
count = var.successful_requests_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] API Management successful requests rate too low {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.successful_requests_message, var.message)}"
|
||||
message = coalesce(var.successful_requests_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
${var.successful_requests_time_aggregator}(${var.successful_requests_timeframe}):
|
||||
@ -131,23 +131,23 @@ resource "datadog_monitor" "apimgt_successful_requests" {
|
||||
avg:azure.apimanagement_service.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate()
|
||||
* 100
|
||||
, 100) < ${var.successful_requests_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
thresholds {
|
||||
critical = "${var.successful_requests_threshold_critical}"
|
||||
warning = "${var.successful_requests_threshold_warning}"
|
||||
thresholds = {
|
||||
critical = var.successful_requests_threshold_critical
|
||||
warning = var.successful_requests_threshold_warning
|
||||
}
|
||||
|
||||
type = "query alert"
|
||||
notify_no_data = false
|
||||
notify_audit = false
|
||||
timeout_h = 1
|
||||
include_tags = true
|
||||
locked = false
|
||||
notify_no_data = false
|
||||
notify_audit = false
|
||||
timeout_h = 1
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
new_host_delay = var.new_host_delay
|
||||
evaluation_delay = var.evaluation_delay
|
||||
renotify_interval = 0
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:apimanagement", "team:claranet", "created-by:terraform", "${var.successful_requests_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:apimanagement", "team:claranet", "created-by:terraform", var.successful_requests_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,24 +1,25 @@
|
||||
output "apimgt_status_id" {
|
||||
description = "id for monitor apimgt_status"
|
||||
value = "${datadog_monitor.apimgt_status.*.id}"
|
||||
value = datadog_monitor.apimgt_status.*.id
|
||||
}
|
||||
|
||||
output "apimgt_failed_requests_id" {
|
||||
description = "id for monitor apimgt_failed_requests"
|
||||
value = "${datadog_monitor.apimgt_failed_requests.*.id}"
|
||||
value = datadog_monitor.apimgt_failed_requests.*.id
|
||||
}
|
||||
|
||||
output "apimgt_other_requests_id" {
|
||||
description = "id for monitor apimgt_other_requests"
|
||||
value = "${datadog_monitor.apimgt_other_requests.*.id}"
|
||||
value = datadog_monitor.apimgt_other_requests.*.id
|
||||
}
|
||||
|
||||
output "apimgt_unauthorized_requests_id" {
|
||||
description = "id for monitor apimgt_unauthorized_requests"
|
||||
value = "${datadog_monitor.apimgt_unauthorized_requests.*.id}"
|
||||
value = datadog_monitor.apimgt_unauthorized_requests.*.id
|
||||
}
|
||||
|
||||
output "apimgt_successful_requests_id" {
|
||||
description = "id for monitor apimgt_successful_requests"
|
||||
value = "${datadog_monitor.apimgt_successful_requests.*.id}"
|
||||
value = datadog_monitor.apimgt_successful_requests.*.id
|
||||
}
|
||||
|
||||
|
||||
4
cloud/azure/apimanagement/versions.tf
Normal file
4
cloud/azure/apimanagement/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -1,6 +1,6 @@
|
||||
variable "environment" {
|
||||
description = "Architecture environment"
|
||||
type = "string"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "filter_tags_use_defaults" {
|
||||
@ -41,31 +41,31 @@ variable "prefix_slug" {
|
||||
|
||||
variable "response_time_enabled" {
|
||||
description = "Flag to enable App Services response time monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "response_time_extra_tags" {
|
||||
description = "Extra tags for App Services response time monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "response_time_message" {
|
||||
description = "Custom message for App Services response time monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "response_time_time_aggregator" {
|
||||
description = "Monitor aggregator for App Services response time [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "response_time_timeframe" {
|
||||
description = "Monitor timeframe for App Services response time [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -81,71 +81,71 @@ variable "response_time_threshold_warning" {
|
||||
|
||||
variable "memory_usage_enabled" {
|
||||
description = "Flag to enable App Services memory usage monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "memory_usage_extra_tags" {
|
||||
description = "Extra tags for App Services memory usage monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "memory_usage_message" {
|
||||
description = "Custom message for App Services memory usage monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "memory_usage_time_aggregator" {
|
||||
description = "Monitor aggregator for App Services memory usage [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "memory_usage_timeframe" {
|
||||
description = "Monitor timeframe for App Services memory usage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "memory_usage_threshold_critical" {
|
||||
default = 1073741824 # 1Gb
|
||||
default = 1073741824 # 1Gb
|
||||
description = "Alerting threshold in Mib"
|
||||
}
|
||||
|
||||
variable "memory_usage_threshold_warning" {
|
||||
default = 536870912 # 512Mb
|
||||
default = 536870912 # 512Mb
|
||||
description = "Warning threshold in MiB"
|
||||
}
|
||||
|
||||
variable "http_4xx_requests_enabled" {
|
||||
description = "Flag to enable App Services 4xx requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "http_4xx_requests_extra_tags" {
|
||||
description = "Extra tags for App Services 4xx requests monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "http_4xx_requests_message" {
|
||||
description = "Custom message for App Services 4xx requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "http_4xx_requests_time_aggregator" {
|
||||
description = "Monitor aggregator for App Services 4xx requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "http_4xx_requests_timeframe" {
|
||||
description = "Monitor timeframe for App Services 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -161,31 +161,31 @@ variable "http_4xx_requests_threshold_warning" {
|
||||
|
||||
variable "http_5xx_requests_enabled" {
|
||||
description = "Flag to enable App Services 5xx requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "http_5xx_requests_extra_tags" {
|
||||
description = "Extra tags for App Services 5xx requests monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "http_5xx_requests_message" {
|
||||
description = "Custom message for App Services 5xx requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "http_5xx_requests_time_aggregator" {
|
||||
description = "Monitor aggregator for App Services 5xx requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "http_5xx_requests_timeframe" {
|
||||
description = "Monitor timeframe for App Services 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -201,31 +201,31 @@ variable "http_5xx_requests_threshold_warning" {
|
||||
|
||||
variable "http_successful_requests_enabled" {
|
||||
description = "Flag to enable App Services successful requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "http_successful_requests_extra_tags" {
|
||||
description = "Extra tags for App Services successful requests monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "http_successful_requests_message" {
|
||||
description = "Custom message for App Services successful requests monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "http_successful_requests_time_aggregator" {
|
||||
description = "Monitor aggregator for App Services successful requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "http_successful_requests_timeframe" {
|
||||
description = "Monitor timeframe for App Services successful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -241,30 +241,31 @@ variable "http_successful_requests_threshold_warning" {
|
||||
|
||||
variable "status_enabled" {
|
||||
description = "Flag to enable App Services status monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "status_message" {
|
||||
description = "Custom message for App Services status monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "status_extra_tags" {
|
||||
description = "Extra tags for App Services status monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "status_time_aggregator" {
|
||||
description = "Monitor aggregator for App Services status [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "status_timeframe" {
|
||||
description = "Monitor timeframe for App Services status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
environment = var.environment
|
||||
resource = "azure_app-services"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
filter_tags_custom_excluded = "${var.filter_tags_custom_excluded}"
|
||||
filter_tags_use_defaults = var.filter_tags_use_defaults
|
||||
filter_tags_custom = var.filter_tags_custom
|
||||
filter_tags_custom_excluded = var.filter_tags_custom_excluded
|
||||
}
|
||||
|
||||
|
||||
@ -1,131 +1,127 @@
|
||||
# Monitoring App Services response time
|
||||
resource "datadog_monitor" "appservices_response_time" {
|
||||
count = "${var.response_time_enabled == "true" ? 1 : 0}"
|
||||
count = var.response_time_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] App Services response time too high {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}"
|
||||
message = coalesce(var.response_time_message, var.message)
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.response_time_message, var.message)}"
|
||||
|
||||
query = <<EOQ
|
||||
${var.response_time_time_aggregator}(${var.response_time_timeframe}): (
|
||||
default(avg:azure.app_services.average_response_time${module.filter-tags.query_alert} by {resource_group,region,name,instance}, 0)
|
||||
) > ${var.response_time_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
thresholds {
|
||||
warning = "${var.response_time_threshold_warning}"
|
||||
critical = "${var.response_time_threshold_critical}"
|
||||
thresholds = {
|
||||
warning = var.response_time_threshold_warning
|
||||
critical = var.response_time_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false # Will NOT notify when no data is received
|
||||
renotify_interval = 0
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:app-services", "team:claranet", "created-by:terraform", "${var.response_time_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:app-services", "team:claranet", "created-by:terraform", var.response_time_extra_tags]
|
||||
}
|
||||
|
||||
# Monitoring App Services memory usage
|
||||
resource "datadog_monitor" "appservices_memory_usage_count" {
|
||||
count = "${var.memory_usage_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] App Services memory usage {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}"
|
||||
count = var.memory_usage_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] App Services memory usage {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}"
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.memory_usage_message, var.message)}"
|
||||
message = coalesce(var.memory_usage_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
${var.memory_usage_time_aggregator}(${var.memory_usage_timeframe}): (
|
||||
avg:azure.app_services.memory_working_set${module.filter-tags.query_alert} by {resource_group,region,name,instance}
|
||||
) > ${var.memory_usage_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
thresholds = {
|
||||
warning = var.memory_usage_threshold_warning
|
||||
critical = var.memory_usage_threshold_critical
|
||||
}
|
||||
|
||||
thresholds {
|
||||
warning = "${var.memory_usage_threshold_warning}"
|
||||
critical = "${var.memory_usage_threshold_critical}"
|
||||
}
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
|
||||
notify_no_data = false # Will NOT notify when no data is received
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:app-services", "team:claranet", "created-by:terraform", "${var.memory_usage_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:app-services", "team:claranet", "created-by:terraform", var.memory_usage_extra_tags]
|
||||
}
|
||||
|
||||
# Monitoring App Services 5xx errors percent
|
||||
resource "datadog_monitor" "appservices_http_5xx_errors_count" {
|
||||
count = "${var.http_5xx_requests_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] App Services HTTP 5xx errors too high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
count = var.http_5xx_requests_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] App Services HTTP 5xx errors too high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.http_5xx_requests_message, var.message)
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.http_5xx_requests_message, var.message)}"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.http_5xx_requests_time_aggregator}(${var.http_5xx_requests_timeframe}): (
|
||||
default(avg:azure.app_services.http5xx${module.filter-tags.query_alert} by {resource_group,region,name,instance}.as_rate(), 0) /
|
||||
default(avg:azure.app_services.requests${module.filter-tags.query_alert} by {resource_group,region,name,instance}.as_rate(), 1)
|
||||
) * 100 > ${var.http_5xx_requests_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
thresholds = {
|
||||
warning = var.http_5xx_requests_threshold_warning
|
||||
critical = var.http_5xx_requests_threshold_critical
|
||||
}
|
||||
|
||||
thresholds {
|
||||
warning = "${var.http_5xx_requests_threshold_warning}"
|
||||
critical = "${var.http_5xx_requests_threshold_critical}"
|
||||
}
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 1
|
||||
include_tags = true
|
||||
|
||||
notify_no_data = false # Will NOT notify when no data is received
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 1
|
||||
include_tags = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:app-services", "team:claranet", "created-by:terraform", "${var.http_5xx_requests_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:app-services", "team:claranet", "created-by:terraform", var.http_5xx_requests_extra_tags]
|
||||
}
|
||||
|
||||
# Monitoring App Services 4xx errors percent
|
||||
resource "datadog_monitor" "appservices_http_4xx_errors_count" {
|
||||
count = "${var.http_4xx_requests_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] App Services HTTP 4xx errors too high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
count = var.http_4xx_requests_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] App Services HTTP 4xx errors too high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.http_4xx_requests_message, var.message)}"
|
||||
message = coalesce(var.http_4xx_requests_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.http_4xx_requests_time_aggregator}(${var.http_4xx_requests_timeframe}): (
|
||||
default(avg:azure.app_services.http4xx${module.filter-tags.query_alert} by {resource_group,region,name,instance}.as_rate(), 0) /
|
||||
default(avg:azure.app_services.requests${module.filter-tags.query_alert} by {resource_group,region,name,instance}.as_rate(), 1)
|
||||
) * 100 > ${var.http_4xx_requests_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
thresholds {
|
||||
warning = "${var.http_4xx_requests_threshold_warning}"
|
||||
critical = "${var.http_4xx_requests_threshold_critical}"
|
||||
thresholds = {
|
||||
warning = var.http_4xx_requests_threshold_warning
|
||||
critical = var.http_4xx_requests_threshold_critical
|
||||
}
|
||||
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false # Will NOT notify when no data is received
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 1
|
||||
include_tags = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:app-services", "team:claranet", "created-by:terraform", "${var.http_4xx_requests_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:app-services", "team:claranet", "created-by:terraform", var.http_4xx_requests_extra_tags]
|
||||
}
|
||||
|
||||
# Monitoring App Services HTTP 2xx & 3xx status pages percent
|
||||
resource "datadog_monitor" "appservices_http_success_status_rate" {
|
||||
count = "${var.http_successful_requests_enabled == "true" ? 1 : 0}"
|
||||
count = var.http_successful_requests_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] App Services HTTP successful responses too low {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
type = "query alert"
|
||||
message = "${coalesce(var.http_successful_requests_message, var.message)}"
|
||||
message = coalesce(var.http_successful_requests_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
${var.http_successful_requests_time_aggregator}(${var.http_successful_requests_timeframe}):
|
||||
@ -134,47 +130,46 @@ resource "datadog_monitor" "appservices_http_success_status_rate" {
|
||||
default(avg:azure.app_services.http3xx${module.filter-tags.query_alert} by {resource_group,region,name,instance}.as_rate(), 0) ) /
|
||||
default(avg:azure.app_services.requests${module.filter-tags.query_alert} by {resource_group,region,name,instance}.as_rate(), 0)
|
||||
) * 100, 100) < ${var.http_successful_requests_threshold_critical}
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
thresholds {
|
||||
warning = "${var.http_successful_requests_threshold_warning}"
|
||||
critical = "${var.http_successful_requests_threshold_critical}"
|
||||
thresholds = {
|
||||
warning = var.http_successful_requests_threshold_warning
|
||||
critical = var.http_successful_requests_threshold_critical
|
||||
}
|
||||
|
||||
notify_no_data = false # Will notify when no data is received
|
||||
renotify_interval = 0
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 1
|
||||
include_tags = true
|
||||
timeout_h = 1
|
||||
include_tags = true
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:app-services", "team:claranet", "created-by:terraform", "${var.http_successful_requests_extra_tags}"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:app-services", "team:claranet", "created-by:terraform", var.http_successful_requests_extra_tags]
|
||||
}
|
||||
|
||||
# Monitoring App Services status
|
||||
resource "datadog_monitor" "appservices_status" {
|
||||
count = "${var.status_enabled == "true" ? 1 : 0}"
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] App Services is down"
|
||||
type = "metric alert"
|
||||
message = "${coalesce(var.status_message, var.message)}"
|
||||
count = var.status_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] App Services is down"
|
||||
type = "metric alert"
|
||||
message = coalesce(var.status_message, var.message)
|
||||
|
||||
query = <<EOQ
|
||||
${var.status_time_aggregator}(${var.status_timeframe}):avg:azure.app_services.status${module.filter-tags.query_alert} by {resource_group,region,name} < 1
|
||||
EOQ
|
||||
EOQ
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
thresholds {
|
||||
critical = 1
|
||||
}
|
||||
|
||||
notify_no_data = true # Will notify when no data is received
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:app-services", "team:claranet", "created-by:terraform", "${var.status_extra_tags}"]
|
||||
thresholds = {
|
||||
critical = 1
|
||||
}
|
||||
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:app-services", "team:claranet", "created-by:terraform", var.status_extra_tags]
|
||||
}
|
||||
|
||||
|
||||
@ -1,29 +1,30 @@
|
||||
output "appservices_response_time_id" {
|
||||
description = "id for monitor appservices_response_time"
|
||||
value = "${datadog_monitor.appservices_response_time.*.id}"
|
||||
value = datadog_monitor.appservices_response_time.*.id
|
||||
}
|
||||
|
||||
output "appservices_memory_usage_count_id" {
|
||||
description = "id for monitor appservices_memory_usage_count"
|
||||
value = "${datadog_monitor.appservices_memory_usage_count.*.id}"
|
||||
value = datadog_monitor.appservices_memory_usage_count.*.id
|
||||
}
|
||||
|
||||
output "appservices_http_5xx_errors_count_id" {
|
||||
description = "id for monitor appservices_http_5xx_errors_count"
|
||||
value = "${datadog_monitor.appservices_http_5xx_errors_count.*.id}"
|
||||
value = datadog_monitor.appservices_http_5xx_errors_count.*.id
|
||||
}
|
||||
|
||||
output "appservices_http_4xx_errors_count_id" {
|
||||
description = "id for monitor appservices_http_4xx_errors_count"
|
||||
value = "${datadog_monitor.appservices_http_4xx_errors_count.*.id}"
|
||||
value = datadog_monitor.appservices_http_4xx_errors_count.*.id
|
||||
}
|
||||
|
||||
output "appservices_http_success_status_rate_id" {
|
||||
description = "id for monitor appservices_http_success_status_rate"
|
||||
value = "${datadog_monitor.appservices_http_success_status_rate.*.id}"
|
||||
value = datadog_monitor.appservices_http_success_status_rate.*.id
|
||||
}
|
||||
|
||||
output "appservices_status_id" {
|
||||
description = "id for monitor appservices_status"
|
||||
value = "${datadog_monitor.appservices_status.*.id}"
|
||||
value = datadog_monitor.appservices_status.*.id
|
||||
}
|
||||
|
||||
|
||||
4
cloud/azure/app-services/versions.tf
Normal file
4
cloud/azure/app-services/versions.tf
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
terraform {
|
||||
required_version = ">= 0.12"
|
||||
}
|
||||
@ -1,6 +1,6 @@
|
||||
variable "environment" {
|
||||
description = "Architecture environment"
|
||||
type = "string"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "filter_tags_use_defaults" {
|
||||
@ -41,31 +41,31 @@ variable "prefix_slug" {
|
||||
|
||||
variable "latency_enabled" {
|
||||
description = "Flag to enable Azure Search latency monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "latency_extra_tags" {
|
||||
description = "Extra tags for Azure Search latency monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "latency_message" {
|
||||
description = "Custom message for Azure Search latency monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "latency_time_aggregator" {
|
||||
description = "Monitor aggregator for Azure Search latency [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "latency_timeframe" {
|
||||
description = "Monitor timeframe for Azure Search latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -81,31 +81,31 @@ variable "latency_threshold_warning" {
|
||||
|
||||
variable "throttled_queries_rate_enabled" {
|
||||
description = "Flag to enable Azure Search throttled queries rate monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "throttled_queries_rate_extra_tags" {
|
||||
description = "Extra tags for Azure Search throttled queries rate monitor"
|
||||
type = "list"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "throttled_queries_rate_message" {
|
||||
description = "Custom message for Azure Search throttled queries rate monitor"
|
||||
type = "string"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "throttled_queries_rate_time_aggregator" {
|
||||
description = "Monitor aggregator for Azure Search throttled queries rate [available values: min, max or avg]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "throttled_queries_rate_timeframe" {
|
||||
description = "Monitor timeframe for Azure Search throttled queries rate [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -118,3 +118,4 @@ variable "throttled_queries_rate_threshold_warning" {
|
||||
default = 25
|
||||
description = "Warning threshold for Azure Search throttled queries rate"
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user