diff --git a/cloud/aws/alb/README.md b/cloud/aws/alb/README.md new file mode 100644 index 0000000..a6dcac0 --- /dev/null +++ b/cloud/aws/alb/README.md @@ -0,0 +1,66 @@ +AWS ALB DataDog monitors +========================================== + +How to use this module +---------------------- + +``` +module "datadog-monitors-aws-alb" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/alb?ref={revision}" + + environment = "${var.environment}" + message = "${module.datadog-message-alerting.alerting-message}" +} + +``` + +Purpose +------- +Creates DataDog monitors with the following checks : + +* ALB no healthy hosts +* ALB latency too high +* ALB http code 5xx percent to high +* ALB http code 4xx percent to high +* ALB target http code 5xx percent to high +* ALB target http code 4xx percent to high + +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| alb_no_healthy_instances_message | Custom message for ALB no healthy instances monitor | string | `` | no | +| alb_no_healthy_instances_silenced | Groups to mute for ALB no healthy instances monitor | map | `` | no | +| delay | Delay in seconds for the metric evaluation | string | `900` | no | +| environment | Architecture environment | string | - | yes | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| httpcode_elb_4xx_message | Custom message for ALB httpcode 4xx monitor | string | `` | no | +| httpcode_elb_4xx_silenced | Groups to mute for ALB httpcode 4xx monitor | map | `` | no | +| httpcode_elb_4xx_threshold_critical | loadbalancer 4xx critical threshold in percentage | string | `80` | no | +| httpcode_elb_4xx_threshold_warning | loadbalancer 4xx warning threshold in percentage | string | `60` | no | +| httpcode_elb_5xx_message | Custom message for ALB httpcode 5xx monitor | string | `` | no | +| httpcode_elb_5xx_silenced | Groups to mute for ALB httpcode 5xx monitor | map | `` | no | +| httpcode_elb_5xx_threshold_critical | loadbalancer 5xxcritical threshold in percentage | string | `80` | no | +| httpcode_elb_5xx_threshold_warning | loadbalancer 5xx warning threshold in percentage | string | `60` | no | +| httpcode_target_4xx_message | Custom message for ALB target httpcode 4xx monitor | string | `` | no | +| httpcode_target_4xx_silenced | Groups to mute for ALB target httpcode 4xx monitor | map | `` | no | +| httpcode_target_4xx_threshold_critical | target 4xx critical threshold in percentage | string | `80` | no | +| httpcode_target_4xx_threshold_warning | target 4xx warning threshold in percentage | string | `60` | no | +| httpcode_target_5xx_message | Custom message for ALB target httpcode 5xx monitor | string | `` | no | +| httpcode_target_5xx_silenced | Groups to mute for ALB target httpcode 5xx monitor | map | `` | no | +| httpcode_target_5xx_threshold_critical | target 5xx critical threshold in percentage | string | `80` | no | +| httpcode_target_5xx_threshold_warning | target 5xx warning threshold in percentage | string | `60` | no | +| latency_message | Custom message for ALB latency monitor | string | `` | no | +| latency_silenced | Groups to mute for ALB latency monitor | map | `` | no | +| latency_threshold_critical | latency critical threshold in milliseconds | string | `1000` | no | +| latency_threshold_warning | latency warning threshold in milliseconds | string | `500` | no | +| message | Message sent when a monitor is triggered | string | - | yes | + +Related documentation +--------------------- + +DataDog blog: [https://www.datadoghq.com/blog/monitor-application-load-balancer/](https://www.datadoghq.com/blog/monitor-application-load-balancer/) + +AWS ALB metrics documentation: [https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-cloudwatch-metrics.html](https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-cloudwatch-metrics.html) diff --git a/cloud/aws/alb/inputs.tf b/cloud/aws/alb/inputs.tf new file mode 100644 index 0000000..e6d71dd --- /dev/null +++ b/cloud/aws/alb/inputs.tf @@ -0,0 +1,154 @@ +# Datadog global variables + +variable "environment" { + description = "Architecture environment" + type = "string" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +variable "message" { + description = "Message sent when a monitor is triggered" +} + +variable "delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +# Datadog monitors variables + +variable "alb_no_healthy_instances_silenced" { + description = "Groups to mute for ALB no healthy instances monitor" + type = "map" + default = {} +} + +variable "alb_no_healthy_instances_message" { + description = "Custom message for ALB no healthy instances monitor" + type = "string" + default = "" +} + +variable "latency_silenced" { + description = "Groups to mute for ALB latency monitor" + type = "map" + default = {} +} + +variable "latency_message" { + description = "Custom message for ALB latency monitor" + type = "string" + default = "" +} + +variable "latency_threshold_critical" { + default = 1000 + description = "latency critical threshold in milliseconds" +} + +variable "latency_threshold_warning" { + default = 500 + description = "latency warning threshold in milliseconds" +} + +variable "httpcode_elb_4xx_silenced" { + description = "Groups to mute for ALB httpcode 4xx monitor" + type = "map" + default = {} +} + +variable "httpcode_elb_4xx_message" { + description = "Custom message for ALB httpcode 4xx monitor" + type = "string" + default = "" +} + +variable "httpcode_elb_4xx_threshold_critical" { + default = 80 + description = "loadbalancer 4xx critical threshold in percentage" +} + +variable "httpcode_elb_4xx_threshold_warning" { + default = 60 + description = "loadbalancer 4xx warning threshold in percentage" +} + +variable "httpcode_target_4xx_silenced" { + description = "Groups to mute for ALB target httpcode 4xx monitor" + type = "map" + default = {} +} + +variable "httpcode_target_4xx_message" { + description = "Custom message for ALB target httpcode 4xx monitor" + type = "string" + default = "" +} + +variable "httpcode_target_4xx_threshold_critical" { + default = 80 + description = "target 4xx critical threshold in percentage" +} + +variable "httpcode_target_4xx_threshold_warning" { + default = 60 + description = "target 4xx warning threshold in percentage" +} + +variable "httpcode_elb_5xx_silenced" { + description = "Groups to mute for ALB httpcode 5xx monitor" + type = "map" + default = {} +} + +variable "httpcode_elb_5xx_message" { + description = "Custom message for ALB httpcode 5xx monitor" + type = "string" + default = "" +} + +variable "httpcode_elb_5xx_threshold_critical" { + default = 80 + description = "loadbalancer 5xxcritical threshold in percentage" +} + +variable "httpcode_elb_5xx_threshold_warning" { + default = 60 + description = "loadbalancer 5xx warning threshold in percentage" +} + +variable "httpcode_target_5xx_silenced" { + description = "Groups to mute for ALB target httpcode 5xx monitor" + type = "map" + default = {} +} + +variable "httpcode_target_5xx_message" { + description = "Custom message for ALB target httpcode 5xx monitor" + type = "string" + default = "" +} + +variable "httpcode_target_5xx_threshold_critical" { + default = 80 + description = "target 5xx critical threshold in percentage" +} + +variable "httpcode_target_5xx_threshold_warning" { + default = 60 + description = "target 5xx warning threshold in percentage" +} + +variable "artificial_requests_count" { + default = 5 + description = "Number of false requests used to mitigate false positive in case of low trafic" +} diff --git a/cloud/aws/alb/monitors-alb.tf b/cloud/aws/alb/monitors-alb.tf new file mode 100644 index 0000000..e03c8d2 --- /dev/null +++ b/cloud/aws/alb/monitors-alb.tf @@ -0,0 +1,200 @@ +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.filter_tags_use_defaults == "true" ? + format("dd_monitoring:enabled,dd_aws_alb:enabled,env:%s", var.environment) : + "${var.filter_tags_custom}"}" + } +} + +resource "datadog_monitor" "ALB_no_healthy_instances" { + name = "[${var.environment}] ALB no healthy instances" + type = "metric alert" + message = "${coalesce(var.alb_no_healthy_instances_message, var.message)}" + + query = < ${var.latency_threshold_critical} + EOF + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + thresholds { + critical = "${var.latency_threshold_critical}" + warning = "${var.latency_threshold_warning}" + } + + notify_no_data = false + renotify_interval = 0 + require_full_window = false + timeout_h = 0 + include_tags = true + + silenced = "${var.latency_silenced}" + + tags = ["env:${var.environment}", "resource:alb", "team:aws", "provider:aws"] +} + +resource "datadog_monitor" "ALB_httpcode_elb_5xx" { + name = "[${var.environment}] ALB HTTP code 5xx {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + type = "metric alert" + message = "${coalesce(var.httpcode_elb_5xx_message, var.message)}" + + query = < ${var.httpcode_elb_5xx_threshold_critical} + EOF + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + thresholds { + critical = "${var.httpcode_elb_5xx_threshold_critical}" + warning = "${var.httpcode_elb_5xx_threshold_warning}" + } + + notify_no_data = false + renotify_interval = 0 + require_full_window = false + timeout_h = 0 + include_tags = true + + silenced = "${var.httpcode_elb_5xx_silenced}" + + tags = ["env:${var.environment}", "resource:alb", "team:aws", "provider:aws"] +} + +resource "datadog_monitor" "ALB_httpcode_elb_4xx" { + name = "[${var.environment}] ALB HTTP code 4xx {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + type = "metric alert" + message = "${coalesce(var.httpcode_elb_4xx_message, var.message)}" + + query = < ${var.httpcode_elb_4xx_threshold_critical} + EOF + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + thresholds { + critical = "${var.httpcode_elb_4xx_threshold_critical}" + warning = "${var.httpcode_elb_4xx_threshold_warning}" + } + + notify_no_data = false + renotify_interval = 0 + require_full_window = false + timeout_h = 0 + include_tags = true + + silenced = "${var.httpcode_elb_4xx_silenced}" + + tags = ["env:${var.environment}", "resource:alb", "team:aws", "provider:aws"] +} + +resource "datadog_monitor" "ALB_httpcode_target_5xx" { + name = "[${var.environment}] ALB target HTTP code 5xx {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + type = "metric alert" + message = "${coalesce(var.httpcode_target_5xx_message, var.message)}" + + query = < ${var.httpcode_target_5xx_threshold_critical} + EOF + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + thresholds { + critical = "${var.httpcode_target_5xx_threshold_critical}" + warning = "${var.httpcode_target_5xx_threshold_warning}" + } + + notify_no_data = false + renotify_interval = 0 + require_full_window = false + timeout_h = 0 + include_tags = true + + silenced = "${var.httpcode_target_5xx_silenced}" + + tags = ["env:${var.environment}", "resource:alb", "team:aws", "provider:aws"] +} + +resource "datadog_monitor" "ALB_httpcode_target_4xx" { + name = "[${var.environment}] ALB target HTTP code 4xx {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + type = "metric alert" + message = "${coalesce(var.httpcode_target_4xx_message, var.message)}" + + query = < ${var.httpcode_target_4xx_threshold_critical} + EOF + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + thresholds { + critical = "${var.httpcode_target_4xx_threshold_critical}" + warning = "${var.httpcode_target_4xx_threshold_warning}" + } + + notify_no_data = false + renotify_interval = 0 + require_full_window = false + timeout_h = 0 + include_tags = true + + silenced = "${var.httpcode_target_4xx_silenced}" + + tags = ["env:${var.environment}", "resource:alb", "team:aws", "provider:aws"] +}