MON-227 Standardize filters and split delays

2018-08-13 16:39:46 +02:00 · 2018-08-13 16:39:46 +02:00 · fb93704b17
commit fb93704b17
parent 9d75a702e3
3 changed files with 26 additions and 48 deletions
--- a/cloud/gcp/lb/README.md
+++ b/cloud/gcp/lb/README.md
@ -33,7 +33,6 @@ Creates DataDog monitors with the following checks:
 | backend_latency_threshold_warning | Latency in seconds (warning threshold) | string | `2000` | no |
 | backend_latency_time_aggregator | Timeframe for the GCP LB Backend Latency monitor | string | `min` | no |
 | backend_latency_timeframe | Timeframe for the GCP LB Backend Latency monitor | string | `last_10m` | no |
-| delay | Delay in seconds for the metric evaluation | string | `900` | no |
 | environment | Architecture environment | string | - | yes |
 | error_rate_4xx_artificial_request | Divisor Delta for the GCP LB 4XX Errors monitor | string | `5` | no |
 | error_rate_4xx_extra_tags | Extra tags for GCP LB 4XX Errors monitor | list | `<list>` | no |
@ -49,8 +48,8 @@ Creates DataDog monitors with the following checks:
 | error_rate_5xx_threshold_critical | Rate error in percentage (critical threshold) | string | `50` | no |
 | error_rate_5xx_time_aggregator | Timeframe for the GCP LB 5XX Errors monitor | string | `sum` | no |
 | error_rate_5xx_timeframe | Timeframe for the GCP LB 5XX Errors monitor | string | `last_5m` | no |
-| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
-| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
+| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no |
+| filter_tags | Tags used for filtering | string | `*` | no |
 | latency_extra_tags | Extra tags for GCP LB Latency monitor | list | `<list>` | no |
 | latency_message | Custom message for the GCP LB Latency monitor | string | `` | no |
 | latency_silenced | Groups to mute for GCP LB Latency monitor | map | `<map>` | no |
@ -59,7 +58,7 @@ Creates DataDog monitors with the following checks:
 | latency_time_aggregator | Timeframe for the GCP LB Latency monitor | string | `min` | no |
 | latency_timeframe | Timeframe for the GCP LB Latency monitor | string | `last_10m` | no |
 | message | Message sent when a monitor is triggered | string | - | yes |
-| project_id | ID of the GCP Project | string | - | yes |
+| new_host_delay | Delay in seconds for the new host evaluation | string | `300` | no |
 | request_count_extra_tags | Extra tags for GCP LB Request Count monitor | list | `<list>` | no |
 | request_count_message | Custom message for the GCP LB Request Count monitor | string | `` | no |
 | request_count_silenced | Groups to mute for GCP LB Request Count monitor | map | `<map>` | no |
--- a/cloud/gcp/lb/inputs.tf
+++ b/cloud/gcp/lb/inputs.tf
@ -6,13 +6,8 @@ variable "environment" {
  type        = "string"
 }

-variable "filter_tags_use_defaults" {
-  description = "Use default filter tags convention"
-  default     = "true"
-}
-
-variable "filter_tags_custom" {
-  description = "Tags used for custom filtering when filter_tags_use_defaults is false"
+variable "filter_tags" {
+  description = "Tags used for filtering"
  default     = "*"
 }

@ -20,17 +15,14 @@ variable "message" {
  description = "Message sent when a monitor is triggered"
 }

-variable "delay" {
+variable "evaluation_delay" {
  description = "Delay in seconds for the metric evaluation"
  default     = 900
 }

-#
-# Filter variables
-#
-variable "project_id" {
-  type        = "string"
-  description = "ID of the GCP Project"
+variable "new_host_delay" {
+  description = "Delay in seconds for the new host evaluation"
+  default     = 300
 }

 #
--- a/cloud/gcp/lb/monitors-lb.tf
+++ b/cloud/gcp/lb/monitors-lb.tf
@ -1,16 +1,3 @@
-#
-# FILTER
-#
-data "template_file" "filter" {
-  template = "$${filter}"
-
-  vars {
-    filter = "${var.filter_tags_use_defaults == "true" ?
-              format("project_id:%s", var.project_id) :
-             "${var.filter_tags_custom}"}"
-  }
-}
-
 #
 # 4XX Errors
 #
@ -22,9 +9,9 @@ resource "datadog_monitor" "error_rate_4xx" {

  query = <<EOF
  ${var.error_rate_4xx_time_aggregator}(${var.error_rate_4xx_timeframe}):
-    avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered},response_code_class:400} by {backend_target_name}.as_count().fill(zero)
+    avg:gcp.loadbalancing.https.request_count{${var.filter_tags},response_code_class:400} by {backend_target_name}.as_count().fill(zero)
    /
-    (avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered}} by {backend_target_name}.as_count().fill(zero) + ${var.error_rate_4xx_artificial_request} ) * 100
+    (avg:gcp.loadbalancing.https.request_count{${var.filter_tags}} by {backend_target_name}.as_count().fill(zero) + ${var.error_rate_4xx_artificial_request} ) * 100
  > ${var.error_rate_4xx_threshold_critical}
 EOF

@ -40,8 +27,8 @@ EOF
  notify_no_data      = false
  renotify_interval   = 0

-  evaluation_delay = "${var.delay}"
-  new_host_delay   = "${var.delay}"
+  evaluation_delay = "${var.evaluation_delay}"
+  new_host_delay   = "${var.new_host_delay}"

  silenced = "${var.error_rate_4xx_silenced}"

@ -66,9 +53,9 @@ resource "datadog_monitor" "error_rate_5xx" {

  query = <<EOF
  ${var.error_rate_5xx_time_aggregator}(${var.error_rate_5xx_timeframe}):
-    avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered},response_code_class:400} by {backend_target_name}.as_count().fill(zero)
+    avg:gcp.loadbalancing.https.request_count{${var.filter_tags},response_code_class:400} by {backend_target_name}.as_count().fill(zero)
    /
-    (avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered}} by {backend_target_name}.as_count().fill(zero) + ${var.error_rate_5xx_artificial_request} ) * 100
+    (avg:gcp.loadbalancing.https.request_count{${var.filter_tags}} by {backend_target_name}.as_count().fill(zero) + ${var.error_rate_5xx_artificial_request} ) * 100
  > ${var.error_rate_5xx_threshold_critical}
 EOF

@ -84,8 +71,8 @@ EOF
  notify_no_data      = false
  renotify_interval   = 0

-  evaluation_delay = "${var.delay}"
-  new_host_delay   = "${var.delay}"
+  evaluation_delay = "${var.evaluation_delay}"
+  new_host_delay   = "${var.new_host_delay}"

  silenced = "${var.error_rate_5xx_silenced}"

@ -110,7 +97,7 @@ resource "datadog_monitor" "latency" {

  query = <<EOF
  ${var.latency_time_aggregator}(${var.latency_timeframe}):
-    min:gcp.loadbalancing.https.total_latencies.avg{${data.template_file.filter.rendered}} by {backend_target_name}
+    min:gcp.loadbalancing.https.total_latencies.avg{${var.filter_tags}} by {backend_target_name}
  > ${var.latency_threshold_critical}
 EOF

@ -127,8 +114,8 @@ EOF
  notify_no_data      = false
  renotify_interval   = 0

-  evaluation_delay = "${var.delay}"
-  new_host_delay   = "${var.delay}"
+  evaluation_delay = "${var.evaluation_delay}"
+  new_host_delay   = "${var.new_host_delay}"

  silenced = "${var.latency_silenced}"

@ -153,7 +140,7 @@ resource "datadog_monitor" "backend_latency" {

  query = <<EOF
  ${var.backend_latency_time_aggregator}(${var.backend_latency_timeframe}):
-    min:gcp.loadbalancing.https.backend_latencies.avg{${data.template_file.filter.rendered}} by {backend_target_name}
+    min:gcp.loadbalancing.https.backend_latencies.avg{${var.filter_tags}} by {backend_target_name}
  > ${var.backend_latency_threshold_critical}
 EOF

@ -170,8 +157,8 @@ EOF
  notify_no_data      = false
  renotify_interval   = 0

-  evaluation_delay = "${var.delay}"
-  new_host_delay   = "${var.delay}"
+  evaluation_delay = "${var.evaluation_delay}"
+  new_host_delay   = "${var.new_host_delay}"

  silenced = "${var.backend_latency_silenced}"

@ -196,7 +183,7 @@ resource "datadog_monitor" "request_count" {

  query = <<EOF
  pct_change(${var.request_count_time_aggregator}(${var.request_count_timeframe}),${var.request_count_timeshift}):
-    avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered}} by {backend_target_name}.as_count().fill(zero)
+    avg:gcp.loadbalancing.https.request_count{${var.filter_tags}} by {backend_target_name}.as_count().fill(zero)
  > ${var.request_count_threshold_critical}
 EOF

@ -213,8 +200,8 @@ EOF
  notify_no_data      = false
  renotify_interval   = 0

-  evaluation_delay = "${var.delay}"
-  new_host_delay   = "${var.delay}"
+  evaluation_delay = "${var.evaluation_delay}"
+  new_host_delay   = "${var.new_host_delay}"

  silenced = "${var.request_count_silenced}"