MON-96 harmonize elb and alb and apigateway requests monitors

This commit is contained in:
Quentin Manfroi 2018-03-23 23:34:39 +01:00
parent ca9cdff481
commit c5d6e58737
6 changed files with 24 additions and 17 deletions

View File

@ -32,7 +32,7 @@ Inputs
|------|-------------|:----:|:-----:|:-----:| |------|-------------|:----:|:-----:|:-----:|
| alb_no_healthy_instances_message | Custom message for ALB no healthy instances monitor | string | `` | no | | alb_no_healthy_instances_message | Custom message for ALB no healthy instances monitor | string | `` | no |
| alb_no_healthy_instances_silenced | Groups to mute for ALB no healthy instances monitor | map | `<map>` | no | | alb_no_healthy_instances_silenced | Groups to mute for ALB no healthy instances monitor | map | `<map>` | no |
| artificial_requests_count | Number of false requests used to mitigate false positive in case of low trafic | string | `0` | no | | artificial_requests_count | Number of false requests used to mitigate false positive in case of low trafic | string | `5` | no |
| delay | Delay in seconds for the metric evaluation | string | `900` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no |
| environment | Architecture environment | string | - | yes | | environment | Architecture environment | string | - | yes |
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |

View File

@ -73,7 +73,7 @@ resource "datadog_monitor" "ALB_httpcode_elb_5xx" {
message = "${coalesce(var.httpcode_elb_5xx_message, var.message)}" message = "${coalesce(var.httpcode_elb_5xx_message, var.message)}"
query = <<EOF query = <<EOF
sum(last_5m): ( min(last_5m): (
default( default(
avg:aws.applicationelb.httpcode_elb_5xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() / avg:aws.applicationelb.httpcode_elb_5xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() /
(avg:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}), (avg:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
@ -106,7 +106,7 @@ resource "datadog_monitor" "ALB_httpcode_elb_4xx" {
message = "${coalesce(var.httpcode_elb_4xx_message, var.message)}" message = "${coalesce(var.httpcode_elb_4xx_message, var.message)}"
query = <<EOF query = <<EOF
sum(last_5m): ( min(last_5m): (
default( default(
avg:aws.applicationelb.httpcode_elb_4xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() / avg:aws.applicationelb.httpcode_elb_4xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() /
(avg:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}), (avg:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
@ -139,7 +139,7 @@ resource "datadog_monitor" "ALB_httpcode_target_5xx" {
message = "${coalesce(var.httpcode_target_5xx_message, var.message)}" message = "${coalesce(var.httpcode_target_5xx_message, var.message)}"
query = <<EOF query = <<EOF
sum(last_5m): ( min(last_5m): (
default( default(
avg:aws.applicationelb.httpcode_target_5xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() / avg:aws.applicationelb.httpcode_target_5xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() /
(avg:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}), (avg:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
@ -172,7 +172,7 @@ resource "datadog_monitor" "ALB_httpcode_target_4xx" {
message = "${coalesce(var.httpcode_target_4xx_message, var.message)}" message = "${coalesce(var.httpcode_target_4xx_message, var.message)}"
query = <<EOF query = <<EOF
sum(last_5m): ( min(last_5m): (
default( default(
avg:aws.applicationelb.httpcode_target_4xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() / avg:aws.applicationelb.httpcode_target_4xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() /
(avg:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}), (avg:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),

View File

@ -36,7 +36,7 @@ resource "datadog_monitor" "API_http_5xx_errors_count" {
message = "${coalesce(var.http_5xx_requests_message, var.message)}" message = "${coalesce(var.http_5xx_requests_message, var.message)}"
query = <<EOF query = <<EOF
sum(last_5m): ( min(last_5m): (
default( default(
avg:aws.apigateway.5xxerror{${var.filter_tags}} by {region,apiname}.as_count() / avg:aws.apigateway.5xxerror{${var.filter_tags}} by {region,apiname}.as_count() /
(avg:aws.apigateway.count{${var.filter_tags}} by {region,apiname}.as_count() + ${var.artificial_requests_count}), (avg:aws.apigateway.count{${var.filter_tags}} by {region,apiname}.as_count() + ${var.artificial_requests_count}),
@ -70,7 +70,7 @@ resource "datadog_monitor" "API_http_4xx_errors_count" {
message = "${coalesce(var.http_4xx_requests_message, var.message)}" message = "${coalesce(var.http_4xx_requests_message, var.message)}"
query = <<EOF query = <<EOF
sum(last_5m): ( min(last_5m): (
default( default(
avg:aws.apigateway.4xxerror{${var.filter_tags}} by {region,apiname}.as_count() / avg:aws.apigateway.4xxerror{${var.filter_tags}} by {region,apiname}.as_count() /
(avg:aws.apigateway.count{${var.filter_tags}} by {region,apiname}.as_count() + ${var.artificial_requests_count}), (avg:aws.apigateway.count{${var.filter_tags}} by {region,apiname}.as_count() + ${var.artificial_requests_count}),

View File

@ -31,6 +31,7 @@ Inputs
| Name | Description | Type | Default | Required | | Name | Description | Type | Default | Required |
|------|-------------|:----:|:-----:|:-----:| |------|-------------|:----:|:-----:|:-----:|
| dd_aws_elb | # ELB | string | `disable` | no | | dd_aws_elb | # ELB | string | `disable` | no |
| artificial_requests_count | Number of false requests used to mitigate false positive in case of low trafic | string | `5` | no |
| elb_4xx_message | Custom message for ELB 4xx errors monitor | string | `` | no | | elb_4xx_message | Custom message for ELB 4xx errors monitor | string | `` | no |
| elb_4xx_silenced | Groups to mute for ELB 4xx errors monitor | map | `<map>` | no | | elb_4xx_silenced | Groups to mute for ELB 4xx errors monitor | map | `<map>` | no |
| elb_4xx_threshold_critical | loadbalancer 4xx critical threshold in percentage | string | `10` | no | | elb_4xx_threshold_critical | loadbalancer 4xx critical threshold in percentage | string | `10` | no |

View File

@ -150,3 +150,9 @@ variable "elb_backend_latency_critical" {
description = "latency critical threshold in seconds" description = "latency critical threshold in seconds"
default = 5 default = 5
} }
variable "artificial_requests_count" {
default = 5
description = "Number of false requests used to mitigate false positive in case of low trafic"
}

View File

@ -39,10 +39,10 @@ resource "datadog_monitor" "ELB_too_much_4xx" {
message = "${coalesce(var.elb_4xx_message, var.message)}" message = "${coalesce(var.elb_4xx_message, var.message)}"
query = <<EOF query = <<EOF
avg(last_5m): ( min(last_5m): (
default( default(
avg:aws.elb.httpcode_elb_4xx{${data.template_file.filter.rendered}} by {region,loadbalancername} / avg:aws.elb.httpcode_elb_4xx{${data.template_file.filter.rendered}} by {region,loadbalancername}.as_count() /
avg:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername}, (avg:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername}.as_count() + ${var.artificial_requests_count}),
0) * 100 0) * 100
) > ${var.elb_4xx_threshold_critical} ) > ${var.elb_4xx_threshold_critical}
EOF EOF
@ -75,10 +75,10 @@ resource "datadog_monitor" "ELB_too_much_5xx" {
message = "${coalesce(var.elb_5xx_message, var.message)}" message = "${coalesce(var.elb_5xx_message, var.message)}"
query = <<EOF query = <<EOF
avg(last_5m): ( min(last_5m): (
default( default(
avg:aws.elb.httpcode_elb_5xx{${data.template_file.filter.rendered}} by {region,loadbalancername} / avg:aws.elb.httpcode_elb_5xx{${data.template_file.filter.rendered}} by {region,loadbalancername} /
avg:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername}, (avg:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername} + ${var.artificial_requests_count}),
0) * 100 0) * 100
) > ${var.elb_5xx_threshold_critical} ) > ${var.elb_5xx_threshold_critical}
EOF EOF
@ -111,10 +111,10 @@ resource "datadog_monitor" "ELB_too_much_4xx_backend" {
message = "${coalesce(var.elb_backend_4xx_message, var.message)}" message = "${coalesce(var.elb_backend_4xx_message, var.message)}"
query = <<EOF query = <<EOF
avg(last_5m): ( min(last_5m): (
default( default(
avg:aws.elb.httpcode_backend_4xx{${data.template_file.filter.rendered}} by {region,loadbalancername} / avg:aws.elb.httpcode_backend_4xx{${data.template_file.filter.rendered}} by {region,loadbalancername} /
avg:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername}, (avg:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername} + ${var.artificial_requests_count}),
0) * 100 0) * 100
) > ${var.elb_backend_4xx_threshold_critical} ) > ${var.elb_backend_4xx_threshold_critical}
EOF EOF
@ -147,10 +147,10 @@ resource "datadog_monitor" "ELB_too_much_5xx_backend" {
message = "${coalesce(var.elb_backend_5xx_message, var.message)}" message = "${coalesce(var.elb_backend_5xx_message, var.message)}"
query = <<EOF query = <<EOF
avg(last_5m): ( min(last_5m): (
default( default(
avg:aws.elb.httpcode_backend_5xx{${data.template_file.filter.rendered}} by {region,loadbalancername} / avg:aws.elb.httpcode_backend_5xx{${data.template_file.filter.rendered}} by {region,loadbalancername} /
avg:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername}, (avg:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername} + ${var.artificial_requests_count}),
0) * 100 0) * 100
) > ${var.elb_backend_5xx_threshold_critical} ) > ${var.elb_backend_5xx_threshold_critical}
EOF EOF