diff --git a/cloud/aws/elb/README.md b/cloud/aws/elb/README.md index 645453d..2943b8a 100644 --- a/cloud/aws/elb/README.md +++ b/cloud/aws/elb/README.md @@ -20,8 +20,10 @@ Creates DataDog monitors with the following checks : * ELB no healthy hosts * ELB latency too high -* ELB http code 5xx percent to high * ELB http code 4xx percent to high +* ELB http code 5xx percent to high +* ELB backend http code 4xx percent to high +* ELB backend http code 5xx percent to high Inputs ------ @@ -33,11 +35,14 @@ Inputs | elb_4xx_threshold_warning | loadbalancer 4xx warning threshold in percentage | string | `5` | no | | elb_5xx_threshold_critical | loadbalancer 5xx critical threshold in percentage | string | `10` | no | | elb_5xx_threshold_warning | loadbalancer 5xx warning threshold in percentage | string | `5` | no | +| elb_backend_4xx_threshold_critical | loadbalancer backend 4xx critical threshold in percentage | string | `10` | no | +| elb_backend_4xx_threshold_warning | loadbalancer backend 4xx warning threshold in percentage | string | `5` | no | +| elb_backend_5xx_threshold_critical | loadbalancer backend 5xx critical threshold in percentage | string | `10` | no | +| elb_backend_5xx_threshold_warning | loadbalancer backend 5xx warning threshold in percentage | string | `5` | no | | elb_backend_latency_critical | latency critical threshold in seconds | string | `5` | no | | elb_backend_latency_warning | latency warning threshold in seconds | string | `1` | no | -| elb_notify_no_data | Use this variable to disable notify no data | string | `true` | no | | environment | Architecture Environment | string | - | yes | | evaluation_delay | Delay in seconds for the metric evaluation | string | `600` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| message | Message sent when an alert is triggered | string | - | yes | +| message | Message sent when an alert is triggered | string | - | yes | \ No newline at end of file diff --git a/cloud/aws/elb/inputs.tf b/cloud/aws/elb/inputs.tf index 3b9ab9e..f2056c8 100644 --- a/cloud/aws/elb/inputs.tf +++ b/cloud/aws/elb/inputs.tf @@ -29,9 +29,14 @@ variable "dd_aws_elb" { default = "disable" } -variable "elb_notify_no_data" { - description = "Use this variable to disable notify no data" - default = true +variable "elb_4xx_threshold_warning" { + description = "loadbalancer 4xx warning threshold in percentage" + default = 5 +} + +variable "elb_4xx_threshold_critical" { + description = "loadbalancer 4xx critical threshold in percentage" + default = 10 } variable "elb_5xx_threshold_warning" { @@ -44,13 +49,23 @@ variable "elb_5xx_threshold_critical" { default = 10 } -variable "elb_4xx_threshold_warning" { - description = "loadbalancer 4xx warning threshold in percentage" +variable "elb_backend_4xx_threshold_warning" { + description = "loadbalancer backend 4xx warning threshold in percentage" default = 5 } -variable "elb_4xx_threshold_critical" { - description = "loadbalancer 4xx critical threshold in percentage" +variable "elb_backend_4xx_threshold_critical" { + description = "loadbalancer backend 4xx critical threshold in percentage" + default = 10 +} + +variable "elb_backend_5xx_threshold_warning" { + description = "loadbalancer backend 5xx warning threshold in percentage" + default = 5 +} + +variable "elb_backend_5xx_threshold_critical" { + description = "loadbalancer backend 5xx critical threshold in percentage" default = 10 } diff --git a/cloud/aws/elb/monitors-elb.tf b/cloud/aws/elb/monitors-elb.tf index a8f04e8..835dc3e 100644 --- a/cloud/aws/elb/monitors-elb.tf +++ b/cloud/aws/elb/monitors-elb.tf @@ -7,97 +7,39 @@ data "template_file" "filter" { } resource "datadog_monitor" "ELB_no_healthy_instances" { - name = "[${var.environment}] ELB no healthy instances on {{host.identifier}}" + name = "[${var.environment}] ELB no healthy instances" message = "${var.message}" query = < 0" - EOF - - type = "metric alert" - - notify_no_data = "${var.elb_notify_no_data}" - evaluation_delay = "${var.evaluation_delay}" - renotify_interval = 0 - notify_audit = false - timeout_h = 0 - include_tags = true - locked = false - require_full_window = true - new_host_delay = "${var.evaluation_delay}" - no_data_timeframe = 20 - - tags = ["env:${var.environment}", "resource:elb", "team:aws", "provider:aws"] -} - -resource "datadog_monitor" "ELB_too_much_5xx_backend" { - name = "[${var.environment}] ELB too much 5xx backend err on {{host.identifier}}" - message = "${var.message}" - - query = < ${var.elb_5xx_threshold_critical}" - EOF - - type = "metric alert" - - thresholds { - warning = "${var.elb_5xx_threshold_warning}" - critical = "${var.elb_5xx_threshold_critical}" - } - - notify_no_data = "${var.elb_notify_no_data}" - evaluation_delay = "${var.evaluation_delay}" - renotify_interval = 0 - notify_audit = false - timeout_h = 0 - include_tags = true - locked = false - require_full_window = true - new_host_delay = "${var.evaluation_delay}" - no_data_timeframe = 20 - - tags = ["env:${var.environment}", "resource:elb", "team:aws", "provider:aws"] -} - -resource "datadog_monitor" "ELB_too_much_4xx_backend" { - name = "[${var.environment}] ELB too much 4xx backend err on {{host.identifier}}" - message = "${var.message}" - - query = < ${var.elb_4xx_threshold_critical}" EOF @@ -108,14 +50,110 @@ resource "datadog_monitor" "ELB_too_much_4xx_backend" { critical = "${var.elb_4xx_threshold_critical}" } - notify_no_data = "${var.elb_notify_no_data}" + notify_no_data = true evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false - require_full_window = true + require_full_window = false + new_host_delay = "${var.evaluation_delay}" + no_data_timeframe = 20 + + tags = ["env:${var.environment}", "resource:elb", "team:aws", "provider:aws"] +} + +resource "datadog_monitor" "ELB_too_much_5xx" { + name = "[${var.environment}] ELB 5xx errors too high {{comparator}} {{#is_alert}}{{threshold}}%{{/is_alert}}{{#is_warning}}{{warn_threshold}}%{{/is_warning}} ({{value}}%)" + message = "${var.message}" + + query = < ${var.elb_5xx_threshold_critical}" + EOF + + type = "metric alert" + + thresholds { + warning = "${var.elb_5xx_threshold_warning}" + critical = "${var.elb_5xx_threshold_critical}" + } + + notify_no_data = true + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.evaluation_delay}" + no_data_timeframe = 20 + + tags = ["env:${var.environment}", "resource:elb", "team:aws", "provider:aws"] +} + +resource "datadog_monitor" "ELB_too_much_4xx_backend" { + name = "[${var.environment}] ELB backend 4xx errors too high {{comparator}} {{#is_alert}}{{threshold}}%{{/is_alert}}{{#is_warning}}{{warn_threshold}}%{{/is_warning}} ({{value}}%)" + message = "${var.message}" + + query = < ${var.elb_backend_4xx_threshold_critical}" + EOF + + type = "metric alert" + + thresholds { + warning = "${var.elb_backend_4xx_threshold_warning}" + critical = "${var.elb_backend_4xx_threshold_critical}" + } + + notify_no_data = true + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.evaluation_delay}" + no_data_timeframe = 20 + + tags = ["env:${var.environment}", "resource:elb", "team:aws", "provider:aws"] +} + +resource "datadog_monitor" "ELB_too_much_5xx_backend" { + name = "[${var.environment}] ELB backend 5xx errors too high {{comparator}} {{#is_alert}}{{threshold}}%{{/is_alert}}{{#is_warning}}{{warn_threshold}}%{{/is_warning}} ({{value}}%)" + message = "${var.message}" + + query = < ${var.elb_backend_5xx_threshold_critical}" + EOF + + type = "metric alert" + + thresholds { + warning = "${var.elb_backend_5xx_threshold_warning}" + critical = "${var.elb_backend_5xx_threshold_critical}" + } + + notify_no_data = true + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false new_host_delay = "${var.evaluation_delay}" no_data_timeframe = 20 @@ -123,12 +161,12 @@ resource "datadog_monitor" "ELB_too_much_4xx_backend" { } resource "datadog_monitor" "ELB_backend_latency" { - name = "[${var.environment}] ELB latency to high on {{host.identifier}}" + name = "[${var.environment}] ELB latency too high {{comparator}} {{#is_alert}}{{threshold}}%{{/is_alert}}{{#is_warning}}{{warn_threshold}}%{{/is_warning}} ({{value}}%)" message = "${var.message}" query = < ${var.elb_backend_latency_critical}}" EOF @@ -139,14 +177,14 @@ resource "datadog_monitor" "ELB_backend_latency" { critical = "${var.elb_backend_latency_critical}" } - notify_no_data = "${var.elb_notify_no_data}" + notify_no_data = true evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false - require_full_window = true + require_full_window = false new_host_delay = "${var.evaluation_delay}" no_data_timeframe = 20