diff --git a/cloud/aws/alb/README.md b/cloud/aws/alb/README.md new file mode 100644 index 0000000..2a4dfed --- /dev/null +++ b/cloud/aws/alb/README.md @@ -0,0 +1,53 @@ +AWS ALB DataDog monitors +========================================== + +How to use this module +---------------------- + +``` +module "datadog-monitors-aws-alb" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/alb?ref={revision}" + + environment = "${var.environment}" + message = "${module.datadog-message-alerting.alerting-message}" +} + +``` + +Purpose +------- +Creates DataDog monitors with the following checks : + +* ALB no healthy hosts +* ALB latency too high +* ALB http code 5xx percent to high +* ALB http code 4xx percent to high +* ALB target http code 5xx percent to high +* ALB target http code 4xx percent to high + +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| delay | Delay in seconds for the metric evaluation | string | `900` | no | +| environment | Architecture environment | string | - | yes | +| filter_tags | Tags used for custom filtering | string | `*` | no | +| httpcode_elb_4xx_threshold_critical | loadbalancer 4xx critical threshold in percentage | string | `80` | no | +| httpcode_elb_4xx_threshold_warning | loadbalancer 4xx warning threshold in percentage | string | `60` | no | +| httpcode_elb_5xx_threshold_critical | loadbalancer 5xx critical threshold in percentage | string | `80` | no | +| httpcode_elb_5xx_threshold_warning | loadbalancer 5xx warning threshold in percentage | string | `60` | no | +| httpcode_target_4xx_threshold_critical | target 4xx critical threshold in percentage | string | `80` | no | +| httpcode_target_4xx_threshold_warning | target 4xx warning threshold in percentage | string | `60` | no | +| httpcode_target_5xx_threshold_critical | target 5xx critical threshold in percentage | string | `80` | no | +| httpcode_target_5xx_threshold_warning | target 5xx warning threshold in percentage | string | `60` | no | +| message | Message sent when a monitor is triggered | string | - | yes | +| latency_threshold_critical | latency critical threshold in milliseconds | string | `1000` | no | +| latency_threshold_warning | latency warning threshold in milliseconds | string | `500` | no | + +Related documentation +--------------------- + +DataDog blog: [https://www.datadoghq.com/blog/monitor-application-load-balancer/](https://www.datadoghq.com/blog/monitor-application-load-balancer/) + +AWS ALB metrics documentation: [https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-cloudwatch-metrics.html](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-cloudwatch-metrics.html) diff --git a/cloud/aws/alb/inputs.tf b/cloud/aws/alb/inputs.tf new file mode 100644 index 0000000..ce7e1b1 --- /dev/null +++ b/cloud/aws/alb/inputs.tf @@ -0,0 +1,78 @@ +# Datadog global variables + +variable "environment" { + description = "Architecture environment" + type = "string" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +variable "message" { + description = "Message sent when a monitor is triggered" +} + +variable "delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +# Datadog monitors variables + +variable "latency_threshold_critical" { + default = 1000 + description = "latency critical threshold in milliseconds" +} + +variable "latency_threshold_warning" { + default = 500 + description = "latency warning threshold in milliseconds" +} + +variable "httpcode_elb_4xx_threshold_critical" { + default = 80 + description = "loadbalancer 4xx critical threshold in percentage" +} + +variable "httpcode_elb_4xx_threshold_warning" { + default = 60 + description = "loadbalancer 4xx warning threshold in percentage" +} + +variable "httpcode_target_4xx_threshold_critical" { + default = 80 + description = "target 4xx critical threshold in percentage" +} + +variable "httpcode_target_4xx_threshold_warning" { + default = 60 + description = "target 4xx warning threshold in percentage" +} + +variable "httpcode_elb_5xx_threshold_critical" { + default = 80 + description = "loadbalancer 5xxcritical threshold in percentage" +} + +variable "httpcode_elb_5xx_threshold_warning" { + default = 60 + description = "loadbalancer 5xx warning threshold in percentage" +} + +variable "httpcode_target_5xx_threshold_critical" { + default = 80 + description = "target 5xx critical threshold in percentage" +} + +variable "httpcode_target_5xx_threshold_warning" { + default = 60 + description = "target 5xx warning threshold in percentage" +} + diff --git a/cloud/aws/alb/monitors-alb.tf b/cloud/aws/alb/monitors-alb.tf new file mode 100644 index 0000000..86e9f3a --- /dev/null +++ b/cloud/aws/alb/monitors-alb.tf @@ -0,0 +1,173 @@ +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_alb:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + } +} + +resource "datadog_monitor" "ALB_no_healthy_instances" { + name = "[${var.environment}] ALB no healthy instances" + type = "metric alert" + message = "${var.message}" + query = < ${var.latency_threshold_critical} + EOF + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + thresholds { + critical = ${var.latency_threshold_critical} + warning = ${var.latency_threshold_warning} + } + + notify_no_data = true # Will notify when no data is received + renotify_interval = 0 + require_full_window = false + timeout_h = 0 + include_tags = true + + tags = ["env:${var.environment}", "resource:alb", "team:aws", "provider:aws"] +} + +resource "datadog_monitor" "ALB_httpcode_elb_5xx" { + name = "[${var.environment}] ALB HTTP code 5xx > ${var.httpcode_elb_5xx_threshold_critical} %" + type = "metric alert" + message = "${var.message}" + query = < ${var.httpcode_elb_5xx_threshold_critical} + EOF + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + thresholds { + critical = ${var.httpcode_elb_5xx_threshold_critical} + warning = ${var.httpcode_elb_5xx_threshold_warning} + } + + notify_no_data = false # Will notify when no data is received + renotify_interval = 0 + require_full_window = false + timeout_h = 1 + include_tags = true + + tags = ["env:${var.environment}", "resource:alb", "team:aws", "provider:aws"] +} + +resource "datadog_monitor" "ALB_httpcode_elb_4xx" { + name = "[${var.environment}] ALB HTTP code 4xx > ${var.httpcode_elb_4xx_threshold_critical} %" + type = "metric alert" + message = "${var.message}" + query = < ${var.httpcode_elb_4xx_threshold_critical} + EOF + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + thresholds { + critical = ${var.httpcode_elb_4xx_threshold_critical} + warning = ${var.httpcode_elb_4xx_threshold_warning} + } + + notify_no_data = false # Will notify when no data is received + renotify_interval = 0 + require_full_window = false + timeout_h = 1 + include_tags = true + + tags = ["env:${var.environment}", "resource:alb", "team:aws", "provider:aws"] +} + +resource "datadog_monitor" "ALB_httpcode_target_5xx" { + name = "[${var.environment}] ALB target HTTP code 5xx > ${var.httpcode_target_5xx_threshold_critical} %" + type = "metric alert" + message = "${var.message}" + query = < ${var.httpcode_target_5xx_threshold_critical} + EOF + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + thresholds { + critical = ${var.httpcode_target_5xx_threshold_critical} + warning = ${var.httpcode_target_5xx_threshold_warning} + } + + notify_no_data = false # Will notify when no data is received + renotify_interval = 0 + require_full_window = false + timeout_h = 1 + include_tags = true + + tags = ["env:${var.environment}", "resource:alb", "team:aws", "provider:aws"] +} + +resource "datadog_monitor" "ALB_httpcode_target_4xx" { + name = "[${var.environment}] ALB target HTTP code 4xx > ${var.httpcode_target_4xx_threshold_critical} %" + type = "metric alert" + message = "${var.message}" + query = < ${var.httpcode_target_4xx_threshold_critical} + EOF + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + thresholds { + critical = ${var.httpcode_target_4xx_threshold_critical} + warning = ${var.httpcode_target_4xx_threshold_warning} + } + + notify_no_data = false # Will notify when no data is received + renotify_interval = 0 + require_full_window = false + timeout_h = 1 + include_tags = true + + tags = ["env:${var.environment}", "resource:alb", "team:aws", "provider:aws"] +} +