MON-304 add default function to latencies

2018-09-17 14:16:45 +02:00 · 2018-09-17 14:16:45 +02:00 · 07f2df9f49
commit 07f2df9f49
parent 368885cc24
3 changed files with 41 additions and 57 deletions
--- a/cloud/aws/alb/monitors-alb.tf
+++ b/cloud/aws/alb/monitors-alb.tf
@ -35,9 +35,9 @@ resource "datadog_monitor" "ALB_latency" {
  message = "${coalesce(var.latency_message, var.message)}"
  query = <<EOF
-    ${var.latency_time_aggregator}(${var.latency_timeframe}): (
+    ${var.latency_time_aggregator}(${var.latency_timeframe}):
-      avg:aws.applicationelb.target_response_time.average${module.filter-tags.query_alert} by {region,loadbalancer}
+      default(avg:aws.applicationelb.target_response_time.average${module.filter-tags.query_alert} by {region,loadbalancer}, 0)
-    ) > ${var.latency_threshold_critical}
+    > ${var.latency_threshold_critical}
  EOF
  evaluation_delay = "${var.evaluation_delay}"
@ -66,12 +66,10 @@ resource "datadog_monitor" "ALB_httpcode_5xx" {
  message = "${coalesce(var.httpcode_alb_5xx_message, var.message)}"
  query = <<EOF
-    sum(${var.httpcode_alb_5xx_timeframe}): (
+    sum(${var.httpcode_alb_5xx_timeframe}):
-      default(
+      default(avg:aws.applicationelb.httpcode_elb_5xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_count(), 0) / (
-        avg:aws.applicationelb.httpcode_alb_5xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_count() /
+      default(avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_count(), 0) + ${var.artificial_requests_count}),
-        (avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
+      * 100 > ${var.httpcode_alb_5xx_threshold_critical}
      0) * 100
    ) > ${var.httpcode_alb_5xx_threshold_critical}
  EOF
  evaluation_delay = "${var.evaluation_delay}"
@ -100,12 +98,10 @@ resource "datadog_monitor" "ALB_httpcode_4xx" {
  message = "${coalesce(var.httpcode_alb_4xx_message, var.message)}"
  query = <<EOF
-    sum(${var.httpcode_alb_4xx_timeframe}): (
+    sum(${var.httpcode_alb_4xx_timeframe}):
-      default(
+      default(avg:aws.applicationelb.httpcode_elb_4xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_count(), 0) / (
-        avg:aws.applicationelb.httpcode_alb_4xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_count() /
+      default(avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_count(), 0) + ${var.artificial_requests_count}),
-        (avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
+      * 100 > ${var.httpcode_alb_4xx_threshold_critical}
      0) * 100
    ) > ${var.httpcode_alb_4xx_threshold_critical}
  EOF
  evaluation_delay = "${var.evaluation_delay}"
@ -134,12 +130,10 @@ resource "datadog_monitor" "ALB_httpcode_target_5xx" {
  message = "${coalesce(var.httpcode_target_5xx_message, var.message)}"
  query = <<EOF
-    sum(${var.httpcode_target_5xx_timeframe}): (
+    sum(${var.httpcode_target_5xx_timeframe}):
-      default(
+      default(avg:aws.applicationelb.httpcode_target_5xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_count(), 0) / (
-        avg:aws.applicationelb.httpcode_target_5xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_count() /
+      default(avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_count(), 0) + ${var.artificial_requests_count}),
-        (avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
+      * 100 > ${var.httpcode_target_5xx_threshold_critical}
      0) * 100
    ) > ${var.httpcode_target_5xx_threshold_critical}
  EOF
  evaluation_delay = "${var.evaluation_delay}"
@ -168,12 +162,10 @@ resource "datadog_monitor" "ALB_httpcode_target_4xx" {
  message = "${coalesce(var.httpcode_target_4xx_message, var.message)}"
  query = <<EOF
-    sum(${var.httpcode_target_4xx_timeframe}): (
+    sum(${var.httpcode_target_4xx_timeframe}):
-      default(
+      default(avg:aws.applicationelb.httpcode_target_4xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_count(), 0) / (
-        avg:aws.applicationelb.httpcode_target_4xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_count() /
+      default(avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_count(), 0) + ${var.artificial_requests_count}),
-        (avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
+      * 100 > ${var.httpcode_target_4xx_threshold_critical}
      0) * 100
    ) > ${var.httpcode_target_4xx_threshold_critical}
  EOF
  evaluation_delay = "${var.evaluation_delay}"
--- a/cloud/aws/apigateway/monitors-api.tf
+++ b/cloud/aws/apigateway/monitors-api.tf
@ -6,9 +6,9 @@ resource "datadog_monitor" "API_Gateway_latency" {
  message = "${coalesce(var.latency_message, var.message)}"
  query = <<EOF
-    ${var.latency_time_aggregator}(${var.latency_timeframe}): (
+    ${var.latency_time_aggregator}(${var.latency_timeframe}):
-      avg:aws.apigateway.latency{${var.filter_tags}} by {region,apiname}
+      default(avg:aws.apigateway.latency{${var.filter_tags}} by {region,apiname})
-    ) > ${var.latency_threshold_critical}
+    > ${var.latency_threshold_critical}
  EOF
  evaluation_delay = "${var.evaluation_delay}"
--- a/cloud/aws/elb/monitors-elb.tf
+++ b/cloud/aws/elb/monitors-elb.tf
@ -32,12 +32,10 @@ resource "datadog_monitor" "ELB_too_much_4xx" {
  message = "${coalesce(var.elb_4xx_message, var.message)}"
  query = <<EOF
-    sum(${var.elb_4xx_timeframe}): (
+    sum(${var.elb_4xx_timeframe}):
-      default(
+      default(avg:aws.elb.httpcode_elb_4xx${module.filter-tags.query_alert} by {region,loadbalancername}.as_count(), 0) / (
-        avg:aws.elb.httpcode_elb_4xx${module.filter-tags.query_alert} by {region,loadbalancername}.as_count() /
+      default(avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername}.as_count(), 0) + ${var.artificial_requests_count}),
-        (avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername}.as_count() + ${var.artificial_requests_count}),
+      * 100 > ${var.elb_4xx_threshold_critical}
      0) * 100
    ) > ${var.elb_4xx_threshold_critical}
  EOF
  type = "metric alert"
@ -68,12 +66,10 @@ resource "datadog_monitor" "ELB_too_much_5xx" {
  message = "${coalesce(var.elb_5xx_message, var.message)}"
  query = <<EOF
-    sum(${var.elb_5xx_timeframe}): (
+    sum(${var.elb_5xx_timeframe}):
-      default(
+      default(avg:aws.elb.httpcode_elb_5xx${module.filter-tags.query_alert} by {region,loadbalancername}.as_count(), 0) / (
-        avg:aws.elb.httpcode_elb_5xx${module.filter-tags.query_alert} by {region,loadbalancername} /
+      default(avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername}.as_count(), 0) + ${var.artificial_requests_count}),
-        (avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername} + ${var.artificial_requests_count}),
+      * 100 > ${var.elb_5xx_threshold_critical}
      0) * 100
    ) > ${var.elb_5xx_threshold_critical}
  EOF
  type = "metric alert"
@ -104,12 +100,10 @@ resource "datadog_monitor" "ELB_too_much_4xx_backend" {
  message = "${coalesce(var.elb_backend_4xx_message, var.message)}"
  query = <<EOF
-    sum(${var.elb_backend_4xx_timeframe}): (
+    sum(${var.elb_backend_4xx_timeframe}):
-      default(
+      default(avg:aws.elb.httpcode_backend_4xx${module.filter-tags.query_alert} by {region,loadbalancername}.as_count(), 0) / (
-        avg:aws.elb.httpcode_backend_4xx${module.filter-tags.query_alert} by {region,loadbalancername} /
+      default(avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername}.as_count(), 0) + ${var.artificial_requests_count}),
-        (avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername} + ${var.artificial_requests_count}),
+      * 100 > ${var.elb_backend_4xx_threshold_critical}
      0) * 100
    ) > ${var.elb_backend_4xx_threshold_critical}
  EOF
  type = "metric alert"
@ -140,12 +134,10 @@ resource "datadog_monitor" "ELB_too_much_5xx_backend" {
  message = "${coalesce(var.elb_backend_5xx_message, var.message)}"
  query = <<EOF
-    sum(${var.elb_backend_5xx_timeframe}): (
+    sum(${var.elb_backend_5xx_timeframe}):
-      default(
+      default(avg:aws.elb.httpcode_backend_5xx${module.filter-tags.query_alert} by {region,loadbalancername}.as_count(), 0) / (
-        avg:aws.elb.httpcode_backend_5xx${module.filter-tags.query_alert} by {region,loadbalancername} /
+      default(avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername}.as_count(), 0) + ${var.artificial_requests_count}),
-        (avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername} + ${var.artificial_requests_count}),
+      * 100 > ${var.elb_backend_5xx_threshold_critical}
      0) * 100
    ) > ${var.elb_backend_5xx_threshold_critical}
  EOF
  type = "metric alert"
@ -176,9 +168,9 @@ resource "datadog_monitor" "ELB_backend_latency" {
  message = "${coalesce(var.elb_backend_latency_message, var.message)}"
  query = <<EOF
-    ${var.elb_backend_latency_time_aggregator}(${var.elb_backend_latency_timeframe}): (
+    ${var.elb_backend_latency_time_aggregator}(${var.elb_backend_latency_timeframe}):
-        avg:aws.elb.latency${module.filter-tags.query_alert} by {region,loadbalancername}
+      default(avg:aws.elb.latency${module.filter-tags.query_alert} by {region,loadbalancername}, 0)
-    ) > ${var.elb_backend_latency_critical}
+    > ${var.elb_backend_latency_critical}
  EOF
  type = "metric alert"