From 4af00f8ed076a5c6b5f4f6929c8c3be487635c91 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Thu, 5 Jul 2018 16:06:35 +0200 Subject: [PATCH 01/11] MON-227 Basic README --- cloud/gcp/lb/README.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 cloud/gcp/lb/README.md diff --git a/cloud/gcp/lb/README.md b/cloud/gcp/lb/README.md new file mode 100644 index 0000000..c1ea99b --- /dev/null +++ b/cloud/gcp/lb/README.md @@ -0,0 +1,28 @@ +How to use this module +---------------------- + +``` +module "datadog-monitors-gcp-memorystore" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/gcp/memorystore?ref={revision}" + + project_id = "${var.gcp_project_id}" + environment = "${var.environment}" + message = "${module.datadog-message-alerting.alerting-message}" +} + +``` + +Purpose +------- +Creates DataDog monitors with the following checks : + +* + +Inputs +------ + +Related documentation +------------ + +* [GCP LB Metrics](https://cloud.google.com/monitoring/api/metrics_gcp#gcp-loadbalancing) +* [Datadog GCP integration](https://docs.datadoghq.com/integrations/google_cloud_platform/) From 19402713c5fc3e63db51c67ab2950db59857152a Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Wed, 1 Aug 2018 12:38:10 +0200 Subject: [PATCH 02/11] MON-227 First version of the monitors --- README.md | 1 + cloud/gcp/lb/README.md | 81 ++++++++++-- cloud/gcp/lb/inputs.tf | 253 ++++++++++++++++++++++++++++++++++++ cloud/gcp/lb/monitors-lb.tf | 229 ++++++++++++++++++++++++++++++++ cloud/gcp/lb/outputs.tf | 24 ++++ 5 files changed, 575 insertions(+), 13 deletions(-) create mode 100644 cloud/gcp/lb/inputs.tf create mode 100644 cloud/gcp/lb/monitors-lb.tf create mode 100644 cloud/gcp/lb/outputs.tf diff --git a/README.md b/README.md index 1cc513d..a3a0282 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,7 @@ The `//` is very important, it's a terraform specific syntax used to separate gi - [cloud-sql](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/cloud-sql/) - [common](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/cloud-sql/common/) - [mysql](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/cloud-sql/mysql/) + - [lb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/lb/) - [pubsub](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/pubsub/) - [common](https://bitbucket.org/morea/terraform.feature.datadog/src/master/common/) - [alerting-message](https://bitbucket.org/morea/terraform.feature.datadog/src/master/common/alerting-message/) diff --git a/cloud/gcp/lb/README.md b/cloud/gcp/lb/README.md index c1ea99b..e2aaaa0 100644 --- a/cloud/gcp/lb/README.md +++ b/cloud/gcp/lb/README.md @@ -1,28 +1,83 @@ -How to use this module ----------------------- +# CLOUD GCP LB DataDog monitors + +## How to use this module ``` -module "datadog-monitors-gcp-memorystore" { - source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/gcp/memorystore?ref={revision}" +module "datadog-monitors-cloud-gcp-lb" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/gcp/lb?ref={revision}" - project_id = "${var.gcp_project_id}" environment = "${var.environment}" message = "${module.datadog-message-alerting.alerting-message}" } ``` -Purpose -------- -Creates DataDog monitors with the following checks : +## Purpose -* +Creates DataDog monitors with the following checks: -Inputs ------- +- GCP LB 4xx errors +- GCP LB 5xx errors +- GCP LB latency +- GCP LB backend latency +- GCP LB Requests count increased abruptly -Related documentation ------------- +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| backend_latency_extra_tags | Extra tags for GCP LB Backend Latency monitor | list | `` | no | +| backend_latency_message | Custom message for the GCP LB Backend Latency monitor | string | `` | no | +| backend_latency_silenced | Groups to mute for GCP LB Backend Latency monitor | map | `` | no | +| backend_latency_threshold_critical | Latency in seconds (critical threshold) | string | `4000` | no | +| backend_latency_threshold_warning | Latency in seconds (warning threshold) | string | `2000` | no | +| backend_latency_time_aggregator | Timeframe for the GCP LB Backend Latency monitor | string | `min` | no | +| backend_latency_timeframe | Timeframe for the GCP LB Backend Latency monitor | string | `last_10m` | no | +| delay | Delay in seconds for the metric evaluation | string | `900` | no | +| environment | Architecture environment | string | - | yes | +| error_rate_4xx_extra_tags | Extra tags for GCP LB 4XX Errors monitor | list | `` | no | +| error_rate_4xx_message | Custom message for the GCP LB 4XX Errors monitor | string | `` | no | +| error_rate_4xx_silenced | Groups to mute for GCP LB 4XX Errors monitor | map | `` | no | +| error_rate_4xx_threshold_critical | Rate error in percentage (critical threshold) | string | `50` | no | +| error_rate_4xx_time_aggregator | Timeframe for the GCP LB 4XX Errors monitor | string | `sum` | no | +| error_rate_4xx_timeframe | Timeframe for the GCP LB 4XX Errors monitor | string | `last_5m` | no | +| error_rate_5xx_extra_tags | Extra tags for GCP LB 5XX Errors monitor | list | `` | no | +| error_rate_5xx_message | Custom message for the GCP LB 5XX Errors monitor | string | `` | no | +| error_rate_5xx_silenced | Groups to mute for GCP LB 5XX Errors monitor | map | `` | no | +| error_rate_5xx_threshold_critical | Rate error in percentage (critical threshold) | string | `50` | no | +| error_rate_5xx_time_aggregator | Timeframe for the GCP LB 5XX Errors monitor | string | `sum` | no | +| error_rate_5xx_timeframe | Timeframe for the GCP LB 5XX Errors monitor | string | `last_5m` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| latency_extra_tags | Extra tags for GCP LB Latency monitor | list | `` | no | +| latency_message | Custom message for the GCP LB Latency monitor | string | `` | no | +| latency_silenced | Groups to mute for GCP LB Latency monitor | map | `` | no | +| latency_threshold_critical | Latency in seconds (critical threshold) | string | `5000` | no | +| latency_threshold_warning | Latency in seconds (warning threshold) | string | `3000` | no | +| latency_time_aggregator | Timeframe for the GCP LB Latency monitor | string | `min` | no | +| latency_timeframe | Timeframe for the GCP LB Latency monitor | string | `last_10m` | no | +| message | Message sent when a monitor is triggered | string | - | yes | +| project_id | ID of the GCP Project | string | - | yes | +| request_count_extra_tags | Extra tags for GCP LB Request Count monitor | list | `` | no | +| request_count_message | Custom message for the GCP LB Request Count monitor | string | `` | no | +| request_count_silenced | Groups to mute for GCP LB Request Count monitor | map | `` | no | +| request_count_threshold_critical | Desviation in percentage (critical threshold) | string | `500` | no | +| request_count_threshold_warning | Desviation in percentage (warning threshold) | string | `250` | no | +| request_count_time_aggregator | Timeframe for the GCP LB Request Count monitor | string | `sum` | no | +| request_count_timeframe | Timeframe for the GCP LB Request Count monitor | string | `last_5m` | no | +| request_count_timeshift | Timeshift for the GCP LB Request Count monitor | string | `last_5m` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| backend_latency_id | id for monitor backend_latency | +| error_rate_4xx_id | id for monitor error_rate_4xx | +| error_rate_5xx_id | id for monitor error_rate_5xx | +| latency_id | id for monitor latency | +| request_count_id | id for monitor request_count | + +## Related documentation * [GCP LB Metrics](https://cloud.google.com/monitoring/api/metrics_gcp#gcp-loadbalancing) * [Datadog GCP integration](https://docs.datadoghq.com/integrations/google_cloud_platform/) diff --git a/cloud/gcp/lb/inputs.tf b/cloud/gcp/lb/inputs.tf new file mode 100644 index 0000000..8581b67 --- /dev/null +++ b/cloud/gcp/lb/inputs.tf @@ -0,0 +1,253 @@ +# +# Datadog global variables +# +variable "environment" { + description = "Architecture environment" + type = "string" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +variable "message" { + description = "Message sent when a monitor is triggered" +} + +variable "delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +# +# Filter variables +# +variable "project_id" { + type = "string" + description = "ID of the GCP Project" +} + +# +# 4XX Errors +# +variable "error_rate_4xx_message" { + description = "Custom message for the GCP LB 4XX Errors monitor" + type = "string" + default = "" +} + +variable "error_rate_4xx_time_aggregator" { + description = "Timeframe for the GCP LB 4XX Errors monitor" + type = "string" + default = "sum" +} + +variable "error_rate_4xx_timeframe" { + description = "Timeframe for the GCP LB 4XX Errors monitor" + type = "string" + default = "last_5m" +} + +variable "error_rate_4xx_threshold_critical" { + description = "Rate error in percentage (critical threshold)" + type = "string" + default = 50 +} + +variable "error_rate_4xx_silenced" { + description = "Groups to mute for GCP LB 4XX Errors monitor" + type = "map" + default = {} +} + +variable "error_rate_4xx_extra_tags" { + description = "Extra tags for GCP LB 4XX Errors monitor" + type = "list" + default = [] +} + +# +# 5XX Errors +# +variable "error_rate_5xx_message" { + description = "Custom message for the GCP LB 5XX Errors monitor" + type = "string" + default = "" +} + +variable "error_rate_5xx_time_aggregator" { + description = "Timeframe for the GCP LB 5XX Errors monitor" + type = "string" + default = "sum" +} + +variable "error_rate_5xx_timeframe" { + description = "Timeframe for the GCP LB 5XX Errors monitor" + type = "string" + default = "last_5m" +} + +variable "error_rate_5xx_threshold_critical" { + description = "Rate error in percentage (critical threshold)" + type = "string" + default = 50 +} + +variable "error_rate_5xx_silenced" { + description = "Groups to mute for GCP LB 5XX Errors monitor" + type = "map" + default = {} +} + +variable "error_rate_5xx_extra_tags" { + description = "Extra tags for GCP LB 5XX Errors monitor" + type = "list" + default = [] +} + +# +# Latency +# +variable "latency_message" { + description = "Custom message for the GCP LB Latency monitor" + type = "string" + default = "" +} + +variable "latency_time_aggregator" { + description = "Timeframe for the GCP LB Latency monitor" + type = "string" + default = "min" +} + +variable "latency_timeframe" { + description = "Timeframe for the GCP LB Latency monitor" + type = "string" + default = "last_10m" +} + +variable "latency_threshold_warning" { + description = "Latency in seconds (warning threshold)" + type = "string" + default = 3000 +} + +variable "latency_threshold_critical" { + description = "Latency in seconds (critical threshold)" + type = "string" + default = 5000 +} + +variable "latency_silenced" { + description = "Groups to mute for GCP LB Latency monitor" + type = "map" + default = {} +} + +variable "latency_extra_tags" { + description = "Extra tags for GCP LB Latency monitor" + type = "list" + default = [] +} + +# +# Latency Backend +# +variable "backend_latency_message" { + description = "Custom message for the GCP LB Backend Latency monitor" + type = "string" + default = "" +} + +variable "backend_latency_time_aggregator" { + description = "Timeframe for the GCP LB Backend Latency monitor" + type = "string" + default = "min" +} + +variable "backend_latency_timeframe" { + description = "Timeframe for the GCP LB Backend Latency monitor" + type = "string" + default = "last_10m" +} + +variable "backend_latency_threshold_warning" { + description = "Latency in seconds (warning threshold)" + type = "string" + default = 2000 +} + +variable "backend_latency_threshold_critical" { + description = "Latency in seconds (critical threshold)" + type = "string" + default = 4000 +} + +variable "backend_latency_silenced" { + description = "Groups to mute for GCP LB Backend Latency monitor" + type = "map" + default = {} +} + +variable "backend_latency_extra_tags" { + description = "Extra tags for GCP LB Backend Latency monitor" + type = "list" + default = [] +} + +# +# Request Count +# +variable "request_count_message" { + description = "Custom message for the GCP LB Request Count monitor" + type = "string" + default = "" +} + +variable "request_count_time_aggregator" { + description = "Timeframe for the GCP LB Request Count monitor" + type = "string" + default = "sum" +} + +variable "request_count_timeframe" { + description = "Timeframe for the GCP LB Request Count monitor" + type = "string" + default = "last_5m" +} + +variable "request_count_timeshift" { + description = "Timeshift for the GCP LB Request Count monitor" + type = "string" + default = "last_5m" +} + +variable "request_count_threshold_warning" { + description = "Desviation in percentage (warning threshold)" + type = "string" + default = 250 +} + +variable "request_count_threshold_critical" { + description = "Desviation in percentage (critical threshold)" + type = "string" + default = 500 +} + +variable "request_count_silenced" { + description = "Groups to mute for GCP LB Request Count monitor" + type = "map" + default = {} +} + +variable "request_count_extra_tags" { + description = "Extra tags for GCP LB Request Count monitor" + type = "list" + default = [] +} diff --git a/cloud/gcp/lb/monitors-lb.tf b/cloud/gcp/lb/monitors-lb.tf new file mode 100644 index 0000000..b16fb6b --- /dev/null +++ b/cloud/gcp/lb/monitors-lb.tf @@ -0,0 +1,229 @@ +# +# FILTER +# +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.filter_tags_use_defaults == "true" ? + format("project_id:%s", var.project_id) : + "${var.filter_tags_custom}"}" + } +} + +# +# 4XX Errors +# +resource "datadog_monitor" "error_rate_4xx" { + name = "[${var.environment}] GCP LB 4xx errors {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.error_rate_4xx_message, var.message)}" + + type = "metric alert" + + query = < ${var.error_rate_4xx_threshold_critical} +EOF + + thresholds { + critical = "${var.error_rate_4xx_threshold_critical}" + } + + notify_audit = false + locked = false + timeout_h = 0 + include_tags = true + require_full_window = false + notify_no_data = false + renotify_interval = 0 + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + silenced = "${var.error_rate_4xx_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "resource:lb", + "env:${var.environment}", + "created_by:terraform", + "${var.error_rate_4xx_extra_tags}", + ] +} + +# +# 5XX Errors +# +resource "datadog_monitor" "error_rate_5xx" { + name = "[${var.environment}] GCP LB 5xx errors {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.error_rate_5xx_message, var.message)}" + + type = "metric alert" + + query = < ${var.error_rate_5xx_threshold_critical} +EOF + + thresholds { + critical = "${var.error_rate_5xx_threshold_critical}" + } + + notify_audit = false + locked = false + timeout_h = 0 + include_tags = true + require_full_window = false + notify_no_data = false + renotify_interval = 0 + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + silenced = "${var.error_rate_5xx_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "resource:lb", + "env:${var.environment}", + "created_by:terraform", + "${var.error_rate_5xx_extra_tags}", + ] +} + +# +# Latency +# +resource "datadog_monitor" "latency" { + name = "[${var.environment}] GCP LB latency {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" + message = "${coalesce(var.latency_message, var.message)}" + + type = "query alert" + + query = < ${var.latency_threshold_critical} +EOF + + thresholds { + warning = "${var.latency_threshold_warning}" + critical = "${var.latency_threshold_critical}" + } + + notify_audit = false + locked = false + timeout_h = 0 + include_tags = true + require_full_window = false + notify_no_data = false + renotify_interval = 0 + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + silenced = "${var.latency_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "resource:lb", + "env:${var.environment}", + "created_by:terraform", + "${var.latency_extra_tags}", + ] +} + +# +# Backend Latency +# +resource "datadog_monitor" "backend_latency" { + name = "[${var.environment}] GCP LB backend latency {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" + message = "${coalesce(var.backend_latency_message, var.message)}" + + type = "metric alert" + + query = < ${var.backend_latency_threshold_critical} +EOF + + thresholds { + warning = "${var.backend_latency_threshold_warning}" + critical = "${var.backend_latency_threshold_critical}" + } + + notify_audit = false + locked = false + timeout_h = 0 + include_tags = true + require_full_window = false + notify_no_data = false + renotify_interval = 0 + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + silenced = "${var.backend_latency_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "resource:lb", + "env:${var.environment}", + "created_by:terraform", + "${var.backend_latency_extra_tags}", + ] +} + +# +# Request Count +# +resource "datadog_monitor" "request_count" { + name = "[${var.environment}] GCP LB Requests count increased abruptly" + message = "${coalesce(var.request_count_message, var.message)}" + + type = "query alert" + + query = < ${var.request_count_threshold_critical} +EOF + + thresholds { + warning = "${var.request_count_threshold_warning}" + critical = "${var.request_count_threshold_critical}" + } + + notify_audit = false + locked = false + timeout_h = 0 + include_tags = true + require_full_window = false + notify_no_data = false + renotify_interval = 0 + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + silenced = "${var.request_count_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "resource:lb", + "env:${var.environment}", + "created_by:terraform", + "${var.request_count_extra_tags}", + ] +} diff --git a/cloud/gcp/lb/outputs.tf b/cloud/gcp/lb/outputs.tf new file mode 100644 index 0000000..f1d368e --- /dev/null +++ b/cloud/gcp/lb/outputs.tf @@ -0,0 +1,24 @@ +output "error_rate_4xx_id" { + description = "id for monitor error_rate_4xx" + value = "${datadog_monitor.error_rate_4xx.id}" +} + +output "error_rate_5xx_id" { + description = "id for monitor error_rate_5xx" + value = "${datadog_monitor.error_rate_5xx.id}" +} + +output "latency_id" { + description = "id for monitor latency" + value = "${datadog_monitor.latency.id}" +} + +output "backend_latency_id" { + description = "id for monitor backend_latency" + value = "${datadog_monitor.backend_latency.id}" +} + +output "request_count_id" { + description = "id for monitor request_count" + value = "${datadog_monitor.request_count.id}" +} From 9d75a702e334b3c71ab3e10f3b9b3e5c333744f1 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Fri, 10 Aug 2018 17:39:40 +0200 Subject: [PATCH 03/11] MON-227 Fixes on title and variable creation for some monitors --- cloud/gcp/lb/README.md | 2 ++ cloud/gcp/lb/inputs.tf | 12 ++++++++++++ cloud/gcp/lb/monitors-lb.tf | 6 +++--- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/cloud/gcp/lb/README.md b/cloud/gcp/lb/README.md index e2aaaa0..8a987f0 100644 --- a/cloud/gcp/lb/README.md +++ b/cloud/gcp/lb/README.md @@ -35,12 +35,14 @@ Creates DataDog monitors with the following checks: | backend_latency_timeframe | Timeframe for the GCP LB Backend Latency monitor | string | `last_10m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture environment | string | - | yes | +| error_rate_4xx_artificial_request | Divisor Delta for the GCP LB 4XX Errors monitor | string | `5` | no | | error_rate_4xx_extra_tags | Extra tags for GCP LB 4XX Errors monitor | list | `` | no | | error_rate_4xx_message | Custom message for the GCP LB 4XX Errors monitor | string | `` | no | | error_rate_4xx_silenced | Groups to mute for GCP LB 4XX Errors monitor | map | `` | no | | error_rate_4xx_threshold_critical | Rate error in percentage (critical threshold) | string | `50` | no | | error_rate_4xx_time_aggregator | Timeframe for the GCP LB 4XX Errors monitor | string | `sum` | no | | error_rate_4xx_timeframe | Timeframe for the GCP LB 4XX Errors monitor | string | `last_5m` | no | +| error_rate_5xx_artificial_request | Divisor Delta for the GCP LB 5XX Errors monitor | string | `5` | no | | error_rate_5xx_extra_tags | Extra tags for GCP LB 5XX Errors monitor | list | `` | no | | error_rate_5xx_message | Custom message for the GCP LB 5XX Errors monitor | string | `` | no | | error_rate_5xx_silenced | Groups to mute for GCP LB 5XX Errors monitor | map | `` | no | diff --git a/cloud/gcp/lb/inputs.tf b/cloud/gcp/lb/inputs.tf index 8581b67..88d8f0f 100644 --- a/cloud/gcp/lb/inputs.tf +++ b/cloud/gcp/lb/inputs.tf @@ -54,6 +54,12 @@ variable "error_rate_4xx_timeframe" { default = "last_5m" } +variable "error_rate_4xx_artificial_request" { + description = "Divisor Delta for the GCP LB 4XX Errors monitor" + type = "string" + default = 5 +} + variable "error_rate_4xx_threshold_critical" { description = "Rate error in percentage (critical threshold)" type = "string" @@ -93,6 +99,12 @@ variable "error_rate_5xx_timeframe" { default = "last_5m" } +variable "error_rate_5xx_artificial_request" { + description = "Divisor Delta for the GCP LB 5XX Errors monitor" + type = "string" + default = 5 +} + variable "error_rate_5xx_threshold_critical" { description = "Rate error in percentage (critical threshold)" type = "string" diff --git a/cloud/gcp/lb/monitors-lb.tf b/cloud/gcp/lb/monitors-lb.tf index b16fb6b..a2601d3 100644 --- a/cloud/gcp/lb/monitors-lb.tf +++ b/cloud/gcp/lb/monitors-lb.tf @@ -24,7 +24,7 @@ resource "datadog_monitor" "error_rate_4xx" { ${var.error_rate_4xx_time_aggregator}(${var.error_rate_4xx_timeframe}): avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered},response_code_class:400} by {backend_target_name}.as_count().fill(zero) / - (avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered}} by {backend_target_name}.as_count().fill(zero) + 5 ) * 100 + (avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered}} by {backend_target_name}.as_count().fill(zero) + ${var.error_rate_4xx_artificial_request} ) * 100 > ${var.error_rate_4xx_threshold_critical} EOF @@ -68,7 +68,7 @@ resource "datadog_monitor" "error_rate_5xx" { ${var.error_rate_5xx_time_aggregator}(${var.error_rate_5xx_timeframe}): avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered},response_code_class:400} by {backend_target_name}.as_count().fill(zero) / - (avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered}} by {backend_target_name}.as_count().fill(zero) + 5 ) * 100 + (avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered}} by {backend_target_name}.as_count().fill(zero) + ${var.error_rate_5xx_artificial_request} ) * 100 > ${var.error_rate_5xx_threshold_critical} EOF @@ -189,7 +189,7 @@ EOF # Request Count # resource "datadog_monitor" "request_count" { - name = "[${var.environment}] GCP LB Requests count increased abruptly" + name = "[${var.environment}] GCP LB Requests count increased abruptly {{#is_alert}}{{value}}%{{/is_alert}}{{#is_warning}}{{value}}%{{/is_warning}}" message = "${coalesce(var.request_count_message, var.message)}" type = "query alert" From fb93704b17f8b64060503a40b2938ca1e9a9a719 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Mon, 13 Aug 2018 16:39:46 +0200 Subject: [PATCH 04/11] MON-227 Standardize filters and split delays --- cloud/gcp/lb/README.md | 7 +++--- cloud/gcp/lb/inputs.tf | 20 +++++----------- cloud/gcp/lb/monitors-lb.tf | 47 ++++++++++++++----------------------- 3 files changed, 26 insertions(+), 48 deletions(-) diff --git a/cloud/gcp/lb/README.md b/cloud/gcp/lb/README.md index 8a987f0..9551d90 100644 --- a/cloud/gcp/lb/README.md +++ b/cloud/gcp/lb/README.md @@ -33,7 +33,6 @@ Creates DataDog monitors with the following checks: | backend_latency_threshold_warning | Latency in seconds (warning threshold) | string | `2000` | no | | backend_latency_time_aggregator | Timeframe for the GCP LB Backend Latency monitor | string | `min` | no | | backend_latency_timeframe | Timeframe for the GCP LB Backend Latency monitor | string | `last_10m` | no | -| delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture environment | string | - | yes | | error_rate_4xx_artificial_request | Divisor Delta for the GCP LB 4XX Errors monitor | string | `5` | no | | error_rate_4xx_extra_tags | Extra tags for GCP LB 4XX Errors monitor | list | `` | no | @@ -49,8 +48,8 @@ Creates DataDog monitors with the following checks: | error_rate_5xx_threshold_critical | Rate error in percentage (critical threshold) | string | `50` | no | | error_rate_5xx_time_aggregator | Timeframe for the GCP LB 5XX Errors monitor | string | `sum` | no | | error_rate_5xx_timeframe | Timeframe for the GCP LB 5XX Errors monitor | string | `last_5m` | no | -| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | -| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | +| filter_tags | Tags used for filtering | string | `*` | no | | latency_extra_tags | Extra tags for GCP LB Latency monitor | list | `` | no | | latency_message | Custom message for the GCP LB Latency monitor | string | `` | no | | latency_silenced | Groups to mute for GCP LB Latency monitor | map | `` | no | @@ -59,7 +58,7 @@ Creates DataDog monitors with the following checks: | latency_time_aggregator | Timeframe for the GCP LB Latency monitor | string | `min` | no | | latency_timeframe | Timeframe for the GCP LB Latency monitor | string | `last_10m` | no | | message | Message sent when a monitor is triggered | string | - | yes | -| project_id | ID of the GCP Project | string | - | yes | +| new_host_delay | Delay in seconds for the new host evaluation | string | `300` | no | | request_count_extra_tags | Extra tags for GCP LB Request Count monitor | list | `` | no | | request_count_message | Custom message for the GCP LB Request Count monitor | string | `` | no | | request_count_silenced | Groups to mute for GCP LB Request Count monitor | map | `` | no | diff --git a/cloud/gcp/lb/inputs.tf b/cloud/gcp/lb/inputs.tf index 88d8f0f..3c3d758 100644 --- a/cloud/gcp/lb/inputs.tf +++ b/cloud/gcp/lb/inputs.tf @@ -6,13 +6,8 @@ variable "environment" { type = "string" } -variable "filter_tags_use_defaults" { - description = "Use default filter tags convention" - default = "true" -} - -variable "filter_tags_custom" { - description = "Tags used for custom filtering when filter_tags_use_defaults is false" +variable "filter_tags" { + description = "Tags used for filtering" default = "*" } @@ -20,17 +15,14 @@ variable "message" { description = "Message sent when a monitor is triggered" } -variable "delay" { +variable "evaluation_delay" { description = "Delay in seconds for the metric evaluation" default = 900 } -# -# Filter variables -# -variable "project_id" { - type = "string" - description = "ID of the GCP Project" +variable "new_host_delay" { + description = "Delay in seconds for the new host evaluation" + default = 300 } # diff --git a/cloud/gcp/lb/monitors-lb.tf b/cloud/gcp/lb/monitors-lb.tf index a2601d3..e306d20 100644 --- a/cloud/gcp/lb/monitors-lb.tf +++ b/cloud/gcp/lb/monitors-lb.tf @@ -1,16 +1,3 @@ -# -# FILTER -# -data "template_file" "filter" { - template = "$${filter}" - - vars { - filter = "${var.filter_tags_use_defaults == "true" ? - format("project_id:%s", var.project_id) : - "${var.filter_tags_custom}"}" - } -} - # # 4XX Errors # @@ -22,9 +9,9 @@ resource "datadog_monitor" "error_rate_4xx" { query = < ${var.error_rate_4xx_threshold_critical} EOF @@ -40,8 +27,8 @@ EOF notify_no_data = false renotify_interval = 0 - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.error_rate_4xx_silenced}" @@ -66,9 +53,9 @@ resource "datadog_monitor" "error_rate_5xx" { query = < ${var.error_rate_5xx_threshold_critical} EOF @@ -84,8 +71,8 @@ EOF notify_no_data = false renotify_interval = 0 - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.error_rate_5xx_silenced}" @@ -110,7 +97,7 @@ resource "datadog_monitor" "latency" { query = < ${var.latency_threshold_critical} EOF @@ -127,8 +114,8 @@ EOF notify_no_data = false renotify_interval = 0 - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.latency_silenced}" @@ -153,7 +140,7 @@ resource "datadog_monitor" "backend_latency" { query = < ${var.backend_latency_threshold_critical} EOF @@ -170,8 +157,8 @@ EOF notify_no_data = false renotify_interval = 0 - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.backend_latency_silenced}" @@ -196,7 +183,7 @@ resource "datadog_monitor" "request_count" { query = < ${var.request_count_threshold_critical} EOF @@ -213,8 +200,8 @@ EOF notify_no_data = false renotify_interval = 0 - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.request_count_silenced}" From bbdd700b97d8d53c188e9e2dca8c3b9dfeb0278a Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Mon, 20 Aug 2018 17:24:02 +0200 Subject: [PATCH 05/11] MON-227 Fix error on the filter for the monitor 5XX --- cloud/gcp/lb/monitors-lb.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/gcp/lb/monitors-lb.tf b/cloud/gcp/lb/monitors-lb.tf index e306d20..46210c7 100644 --- a/cloud/gcp/lb/monitors-lb.tf +++ b/cloud/gcp/lb/monitors-lb.tf @@ -53,7 +53,7 @@ resource "datadog_monitor" "error_rate_5xx" { query = < ${var.error_rate_5xx_threshold_critical} From 6de13602a3c65698952331accb47dc89c8bbd365 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Tue, 21 Aug 2018 16:11:41 +0200 Subject: [PATCH 06/11] MON-227 Tags migrated to the new standard --- cloud/gcp/lb/monitors-lb.tf | 45 +++++-------------------------------- 1 file changed, 5 insertions(+), 40 deletions(-) diff --git a/cloud/gcp/lb/monitors-lb.tf b/cloud/gcp/lb/monitors-lb.tf index 46210c7..80fbf05 100644 --- a/cloud/gcp/lb/monitors-lb.tf +++ b/cloud/gcp/lb/monitors-lb.tf @@ -32,14 +32,7 @@ EOF silenced = "${var.error_rate_4xx_silenced}" - tags = [ - "team:gcp", - "provider:gcp", - "resource:lb", - "env:${var.environment}", - "created_by:terraform", - "${var.error_rate_4xx_extra_tags}", - ] + tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:lb", "team:claranet", "created-by:terraform", "${var.error_rate_4xx_extra_tags}"] } # @@ -76,14 +69,7 @@ EOF silenced = "${var.error_rate_5xx_silenced}" - tags = [ - "team:gcp", - "provider:gcp", - "resource:lb", - "env:${var.environment}", - "created_by:terraform", - "${var.error_rate_5xx_extra_tags}", - ] + tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:lb", "team:claranet", "created-by:terraform", "${var.error_rate_5xx_extra_tags}"] } # @@ -119,14 +105,7 @@ EOF silenced = "${var.latency_silenced}" - tags = [ - "team:gcp", - "provider:gcp", - "resource:lb", - "env:${var.environment}", - "created_by:terraform", - "${var.latency_extra_tags}", - ] + tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:lb", "team:claranet", "created-by:terraform", "${var.latency_extra_tags}"] } # @@ -162,14 +141,7 @@ EOF silenced = "${var.backend_latency_silenced}" - tags = [ - "team:gcp", - "provider:gcp", - "resource:lb", - "env:${var.environment}", - "created_by:terraform", - "${var.backend_latency_extra_tags}", - ] + tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:lb", "team:claranet", "created-by:terraform", "${var.backend_latency_extra_tags}"] } # @@ -205,12 +177,5 @@ EOF silenced = "${var.request_count_silenced}" - tags = [ - "team:gcp", - "provider:gcp", - "resource:lb", - "env:${var.environment}", - "created_by:terraform", - "${var.request_count_extra_tags}", - ] + tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:lb", "team:claranet", "created-by:terraform", "${var.request_count_extra_tags}"] } From 3da267ee7e65adb3a21ba021096f096190e1e858 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Fri, 24 Aug 2018 13:01:28 +0200 Subject: [PATCH 07/11] MON-227 change backend_target_name by forwarding_rule_name for grouping --- cloud/gcp/lb/monitors-lb.tf | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cloud/gcp/lb/monitors-lb.tf b/cloud/gcp/lb/monitors-lb.tf index 80fbf05..8a56726 100644 --- a/cloud/gcp/lb/monitors-lb.tf +++ b/cloud/gcp/lb/monitors-lb.tf @@ -9,9 +9,9 @@ resource "datadog_monitor" "error_rate_4xx" { query = < ${var.error_rate_4xx_threshold_critical} EOF @@ -46,9 +46,9 @@ resource "datadog_monitor" "error_rate_5xx" { query = < ${var.error_rate_5xx_threshold_critical} EOF @@ -83,7 +83,7 @@ resource "datadog_monitor" "latency" { query = < ${var.latency_threshold_critical} EOF @@ -119,7 +119,7 @@ resource "datadog_monitor" "backend_latency" { query = < ${var.backend_latency_threshold_critical} EOF @@ -155,7 +155,7 @@ resource "datadog_monitor" "request_count" { query = < ${var.request_count_threshold_critical} EOF From 5fdcc12a2934240efefc005340ed0d70cb7a6dec Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Mon, 27 Aug 2018 09:50:54 +0200 Subject: [PATCH 08/11] MON-227 replace fill by default function --- cloud/gcp/lb/monitors-lb.tf | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cloud/gcp/lb/monitors-lb.tf b/cloud/gcp/lb/monitors-lb.tf index 8a56726..3d34cb4 100644 --- a/cloud/gcp/lb/monitors-lb.tf +++ b/cloud/gcp/lb/monitors-lb.tf @@ -9,9 +9,9 @@ resource "datadog_monitor" "error_rate_4xx" { query = < ${var.error_rate_4xx_threshold_critical} EOF @@ -45,11 +45,11 @@ resource "datadog_monitor" "error_rate_5xx" { type = "metric alert" query = < ${var.error_rate_5xx_threshold_critical} + ${var.error_rate_4xx_time_aggregator}(${var.error_rate_4xx_timeframe}): + default(sum:gcp.loadbalancing.https.request_count{${var.filter_tags},response_code_class:500} by {forwarding_rule_name}.as_count(), 0) + / (default(sum:gcp.loadbalancing.https.request_count{${var.filter_tags}} by {forwarding_rule_name}.as_count(), 0) + + ${var.error_rate_4xx_artificial_request}) * 100 + > ${var.error_rate_4xx_threshold_critical} EOF thresholds { @@ -155,7 +155,7 @@ resource "datadog_monitor" "request_count" { query = < ${var.request_count_threshold_critical} EOF From bc7211ba5f5eed33576c7496d0a7c93cbe2cfc33 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 30 Aug 2018 15:34:15 +0200 Subject: [PATCH 09/11] MON-227 delete total latencies and split backend latency into bucket and service --- cloud/gcp/lb/inputs.tf | 110 ++++++++++++++++++------------------ cloud/gcp/lb/monitors-lb.tf | 94 +++++++++++++++--------------- 2 files changed, 102 insertions(+), 102 deletions(-) diff --git a/cloud/gcp/lb/inputs.tf b/cloud/gcp/lb/inputs.tf index 3c3d758..8644ce9 100644 --- a/cloud/gcp/lb/inputs.tf +++ b/cloud/gcp/lb/inputs.tf @@ -116,90 +116,90 @@ variable "error_rate_5xx_extra_tags" { } # -# Latency +# Latency Backend service # -variable "latency_message" { - description = "Custom message for the GCP LB Latency monitor" - type = "string" - default = "" -} - -variable "latency_time_aggregator" { - description = "Timeframe for the GCP LB Latency monitor" - type = "string" - default = "min" -} - -variable "latency_timeframe" { - description = "Timeframe for the GCP LB Latency monitor" - type = "string" - default = "last_10m" -} - -variable "latency_threshold_warning" { - description = "Latency in seconds (warning threshold)" - type = "string" - default = 3000 -} - -variable "latency_threshold_critical" { - description = "Latency in seconds (critical threshold)" - type = "string" - default = 5000 -} - -variable "latency_silenced" { - description = "Groups to mute for GCP LB Latency monitor" - type = "map" - default = {} -} - -variable "latency_extra_tags" { - description = "Extra tags for GCP LB Latency monitor" - type = "list" - default = [] -} - -# -# Latency Backend -# -variable "backend_latency_message" { +variable "backend_latency_service_message" { description = "Custom message for the GCP LB Backend Latency monitor" type = "string" default = "" } -variable "backend_latency_time_aggregator" { +variable "backend_latency_service_time_aggregator" { description = "Timeframe for the GCP LB Backend Latency monitor" type = "string" default = "min" } -variable "backend_latency_timeframe" { +variable "backend_latency_service_timeframe" { description = "Timeframe for the GCP LB Backend Latency monitor" type = "string" default = "last_10m" } -variable "backend_latency_threshold_warning" { +variable "backend_latency_service_threshold_warning" { description = "Latency in seconds (warning threshold)" type = "string" - default = 2000 + default = 1000 } -variable "backend_latency_threshold_critical" { +variable "backend_latency_service_threshold_critical" { description = "Latency in seconds (critical threshold)" type = "string" - default = 4000 + default = 1500 } -variable "backend_latency_silenced" { +variable "backend_latency_service_silenced" { description = "Groups to mute for GCP LB Backend Latency monitor" type = "map" default = {} } -variable "backend_latency_extra_tags" { +variable "backend_latency_service_extra_tags" { + description = "Extra tags for GCP LB Backend Latency monitor" + type = "list" + default = [] +} + +# +# Latency Backend bucket +# +variable "backend_latency_bucket_message" { + description = "Custom message for the GCP LB Backend Latency monitor" + type = "string" + default = "" +} + +variable "backend_latency_bucket_time_aggregator" { + description = "Timeframe for the GCP LB Backend Latency monitor" + type = "string" + default = "min" +} + +variable "backend_latency_bucket_timeframe" { + description = "Timeframe for the GCP LB Backend Latency monitor" + type = "string" + default = "last_10m" +} + +variable "backend_latency_bucket_threshold_warning" { + description = "Latency in seconds (warning threshold)" + type = "string" + default = 4000 +} + +variable "backend_latency_bucket_threshold_critical" { + description = "Latency in seconds (critical threshold)" + type = "string" + default = 8000 +} + +variable "backend_latency_bucket_silenced" { + description = "Groups to mute for GCP LB Backend Latency monitor" + type = "map" + default = {} +} + +variable "backend_latency_bucket_extra_tags" { description = "Extra tags for GCP LB Backend Latency monitor" type = "list" default = [] diff --git a/cloud/gcp/lb/monitors-lb.tf b/cloud/gcp/lb/monitors-lb.tf index 3d34cb4..141e229 100644 --- a/cloud/gcp/lb/monitors-lb.tf +++ b/cloud/gcp/lb/monitors-lb.tf @@ -73,59 +73,23 @@ EOF } # -# Latency +# Backend Latency for service # -resource "datadog_monitor" "latency" { - name = "[${var.environment}] GCP LB latency {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" - message = "${coalesce(var.latency_message, var.message)}" - - type = "query alert" - - query = < ${var.latency_threshold_critical} -EOF - - thresholds { - warning = "${var.latency_threshold_warning}" - critical = "${var.latency_threshold_critical}" - } - - notify_audit = false - locked = false - timeout_h = 0 - include_tags = true - require_full_window = false - notify_no_data = false - renotify_interval = 0 - - evaluation_delay = "${var.evaluation_delay}" - new_host_delay = "${var.new_host_delay}" - - silenced = "${var.latency_silenced}" - - tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:lb", "team:claranet", "created-by:terraform", "${var.latency_extra_tags}"] -} - -# -# Backend Latency -# -resource "datadog_monitor" "backend_latency" { - name = "[${var.environment}] GCP LB backend latency {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" - message = "${coalesce(var.backend_latency_message, var.message)}" +resource "datadog_monitor" "backend_latency_service" { + name = "[${var.environment}] GCP LB service backend latency {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" + message = "${coalesce(var.backend_latency_service_message, var.message)}" type = "metric alert" query = < ${var.backend_latency_threshold_critical} + ${var.backend_latency_service_time_aggregator}(${var.backend_latency_service_timeframe}): + min:gcp.loadbalancing.https.backend_latencies.avg{${var.filter_tags},backend_target_type:backend_service} by {backend_target_name,forwarding_rule_name} + > ${var.backend_latency_service_threshold_critical} EOF thresholds { - warning = "${var.backend_latency_threshold_warning}" - critical = "${var.backend_latency_threshold_critical}" + warning = "${var.backend_latency_service_threshold_warning}" + critical = "${var.backend_latency_service_threshold_critical}" } notify_audit = false @@ -139,9 +103,45 @@ EOF evaluation_delay = "${var.evaluation_delay}" new_host_delay = "${var.new_host_delay}" - silenced = "${var.backend_latency_silenced}" + silenced = "${var.backend_latency_service_silenced}" - tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:lb", "team:claranet", "created-by:terraform", "${var.backend_latency_extra_tags}"] + tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:lb", "team:claranet", "created-by:terraform", "${var.backend_latency_service_extra_tags}"] +} + +# +# Backend Latency for bucket +# +resource "datadog_monitor" "backend_latency_bucket" { + name = "[${var.environment}] GCP LB bucket backend latency {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" + message = "${coalesce(var.backend_latency_bucket_message, var.message)}" + + type = "metric alert" + + query = < ${var.backend_latency_bucket_threshold_critical} +EOF + + thresholds { + warning = "${var.backend_latency_bucket_threshold_warning}" + critical = "${var.backend_latency_bucket_threshold_critical}" + } + + notify_audit = false + locked = false + timeout_h = 0 + include_tags = true + require_full_window = false + notify_no_data = false + renotify_interval = 0 + + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" + + silenced = "${var.backend_latency_bucket_silenced}" + + tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:lb", "team:claranet", "created-by:terraform", "${var.backend_latency_bucket_extra_tags}"] } # From 00ba4ae3ad26f9f1aa54a75365fef70a8e9f69eb Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 30 Aug 2018 15:34:46 +0200 Subject: [PATCH 10/11] MON-227 auto update --- cloud/gcp/lb/README.md | 36 ++++++++++++++++++------------------ cloud/gcp/lb/outputs.tf | 18 +++++++++--------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/cloud/gcp/lb/README.md b/cloud/gcp/lb/README.md index 9551d90..05aa2d0 100644 --- a/cloud/gcp/lb/README.md +++ b/cloud/gcp/lb/README.md @@ -18,21 +18,28 @@ Creates DataDog monitors with the following checks: - GCP LB 4xx errors - GCP LB 5xx errors -- GCP LB latency -- GCP LB backend latency +- GCP LB bucket backend latency - GCP LB Requests count increased abruptly +- GCP LB service backend latency ## Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| backend_latency_extra_tags | Extra tags for GCP LB Backend Latency monitor | list | `` | no | -| backend_latency_message | Custom message for the GCP LB Backend Latency monitor | string | `` | no | -| backend_latency_silenced | Groups to mute for GCP LB Backend Latency monitor | map | `` | no | -| backend_latency_threshold_critical | Latency in seconds (critical threshold) | string | `4000` | no | -| backend_latency_threshold_warning | Latency in seconds (warning threshold) | string | `2000` | no | -| backend_latency_time_aggregator | Timeframe for the GCP LB Backend Latency monitor | string | `min` | no | -| backend_latency_timeframe | Timeframe for the GCP LB Backend Latency monitor | string | `last_10m` | no | +| backend_latency_bucket_extra_tags | Extra tags for GCP LB Backend Latency monitor | list | `` | no | +| backend_latency_bucket_message | Custom message for the GCP LB Backend Latency monitor | string | `` | no | +| backend_latency_bucket_silenced | Groups to mute for GCP LB Backend Latency monitor | map | `` | no | +| backend_latency_bucket_threshold_critical | Latency in seconds (critical threshold) | string | `8000` | no | +| backend_latency_bucket_threshold_warning | Latency in seconds (warning threshold) | string | `4000` | no | +| backend_latency_bucket_time_aggregator | Timeframe for the GCP LB Backend Latency monitor | string | `min` | no | +| backend_latency_bucket_timeframe | Timeframe for the GCP LB Backend Latency monitor | string | `last_10m` | no | +| backend_latency_service_extra_tags | Extra tags for GCP LB Backend Latency monitor | list | `` | no | +| backend_latency_service_message | Custom message for the GCP LB Backend Latency monitor | string | `` | no | +| backend_latency_service_silenced | Groups to mute for GCP LB Backend Latency monitor | map | `` | no | +| backend_latency_service_threshold_critical | Latency in seconds (critical threshold) | string | `1500` | no | +| backend_latency_service_threshold_warning | Latency in seconds (warning threshold) | string | `1000` | no | +| backend_latency_service_time_aggregator | Timeframe for the GCP LB Backend Latency monitor | string | `min` | no | +| backend_latency_service_timeframe | Timeframe for the GCP LB Backend Latency monitor | string | `last_10m` | no | | environment | Architecture environment | string | - | yes | | error_rate_4xx_artificial_request | Divisor Delta for the GCP LB 4XX Errors monitor | string | `5` | no | | error_rate_4xx_extra_tags | Extra tags for GCP LB 4XX Errors monitor | list | `` | no | @@ -50,13 +57,6 @@ Creates DataDog monitors with the following checks: | error_rate_5xx_timeframe | Timeframe for the GCP LB 5XX Errors monitor | string | `last_5m` | no | | evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | | filter_tags | Tags used for filtering | string | `*` | no | -| latency_extra_tags | Extra tags for GCP LB Latency monitor | list | `` | no | -| latency_message | Custom message for the GCP LB Latency monitor | string | `` | no | -| latency_silenced | Groups to mute for GCP LB Latency monitor | map | `` | no | -| latency_threshold_critical | Latency in seconds (critical threshold) | string | `5000` | no | -| latency_threshold_warning | Latency in seconds (warning threshold) | string | `3000` | no | -| latency_time_aggregator | Timeframe for the GCP LB Latency monitor | string | `min` | no | -| latency_timeframe | Timeframe for the GCP LB Latency monitor | string | `last_10m` | no | | message | Message sent when a monitor is triggered | string | - | yes | | new_host_delay | Delay in seconds for the new host evaluation | string | `300` | no | | request_count_extra_tags | Extra tags for GCP LB Request Count monitor | list | `` | no | @@ -72,10 +72,10 @@ Creates DataDog monitors with the following checks: | Name | Description | |------|-------------| -| backend_latency_id | id for monitor backend_latency | +| backend_latency_bucket_id | id for monitor backend_latency_bucket | +| backend_latency_service_id | id for monitor backend_latency_service | | error_rate_4xx_id | id for monitor error_rate_4xx | | error_rate_5xx_id | id for monitor error_rate_5xx | -| latency_id | id for monitor latency | | request_count_id | id for monitor request_count | ## Related documentation diff --git a/cloud/gcp/lb/outputs.tf b/cloud/gcp/lb/outputs.tf index f1d368e..4718769 100644 --- a/cloud/gcp/lb/outputs.tf +++ b/cloud/gcp/lb/outputs.tf @@ -1,24 +1,24 @@ output "error_rate_4xx_id" { description = "id for monitor error_rate_4xx" - value = "${datadog_monitor.error_rate_4xx.id}" + value = "${datadog_monitor.error_rate_4xx.*.id}" } output "error_rate_5xx_id" { description = "id for monitor error_rate_5xx" - value = "${datadog_monitor.error_rate_5xx.id}" + value = "${datadog_monitor.error_rate_5xx.*.id}" } -output "latency_id" { - description = "id for monitor latency" - value = "${datadog_monitor.latency.id}" +output "backend_latency_service_id" { + description = "id for monitor backend_latency_service" + value = "${datadog_monitor.backend_latency_service.*.id}" } -output "backend_latency_id" { - description = "id for monitor backend_latency" - value = "${datadog_monitor.backend_latency.id}" +output "backend_latency_bucket_id" { + description = "id for monitor backend_latency_bucket" + value = "${datadog_monitor.backend_latency_bucket.*.id}" } output "request_count_id" { description = "id for monitor request_count" - value = "${datadog_monitor.request_count.id}" + value = "${datadog_monitor.request_count.*.id}" } From 70013137cb6388931b2d831ea56e1873a8108f7e Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 30 Aug 2018 15:46:31 +0200 Subject: [PATCH 11/11] MON-227 fix unit for latency --- cloud/gcp/lb/README.md | 8 ++++---- cloud/gcp/lb/inputs.tf | 8 ++++---- cloud/gcp/lb/monitors-lb.tf | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cloud/gcp/lb/README.md b/cloud/gcp/lb/README.md index 05aa2d0..cc707e7 100644 --- a/cloud/gcp/lb/README.md +++ b/cloud/gcp/lb/README.md @@ -29,15 +29,15 @@ Creates DataDog monitors with the following checks: | backend_latency_bucket_extra_tags | Extra tags for GCP LB Backend Latency monitor | list | `` | no | | backend_latency_bucket_message | Custom message for the GCP LB Backend Latency monitor | string | `` | no | | backend_latency_bucket_silenced | Groups to mute for GCP LB Backend Latency monitor | map | `` | no | -| backend_latency_bucket_threshold_critical | Latency in seconds (critical threshold) | string | `8000` | no | -| backend_latency_bucket_threshold_warning | Latency in seconds (warning threshold) | string | `4000` | no | +| backend_latency_bucket_threshold_critical | Latency in milliseconds (critical threshold) | string | `8000` | no | +| backend_latency_bucket_threshold_warning | Latency in milliseconds (warning threshold) | string | `4000` | no | | backend_latency_bucket_time_aggregator | Timeframe for the GCP LB Backend Latency monitor | string | `min` | no | | backend_latency_bucket_timeframe | Timeframe for the GCP LB Backend Latency monitor | string | `last_10m` | no | | backend_latency_service_extra_tags | Extra tags for GCP LB Backend Latency monitor | list | `` | no | | backend_latency_service_message | Custom message for the GCP LB Backend Latency monitor | string | `` | no | | backend_latency_service_silenced | Groups to mute for GCP LB Backend Latency monitor | map | `` | no | -| backend_latency_service_threshold_critical | Latency in seconds (critical threshold) | string | `1500` | no | -| backend_latency_service_threshold_warning | Latency in seconds (warning threshold) | string | `1000` | no | +| backend_latency_service_threshold_critical | Latency in milliseconds (critical threshold) | string | `1500` | no | +| backend_latency_service_threshold_warning | Latency in milliseconds (warning threshold) | string | `1000` | no | | backend_latency_service_time_aggregator | Timeframe for the GCP LB Backend Latency monitor | string | `min` | no | | backend_latency_service_timeframe | Timeframe for the GCP LB Backend Latency monitor | string | `last_10m` | no | | environment | Architecture environment | string | - | yes | diff --git a/cloud/gcp/lb/inputs.tf b/cloud/gcp/lb/inputs.tf index 8644ce9..69f3375 100644 --- a/cloud/gcp/lb/inputs.tf +++ b/cloud/gcp/lb/inputs.tf @@ -137,13 +137,13 @@ variable "backend_latency_service_timeframe" { } variable "backend_latency_service_threshold_warning" { - description = "Latency in seconds (warning threshold)" + description = "Latency in milliseconds (warning threshold)" type = "string" default = 1000 } variable "backend_latency_service_threshold_critical" { - description = "Latency in seconds (critical threshold)" + description = "Latency in milliseconds (critical threshold)" type = "string" default = 1500 } @@ -182,13 +182,13 @@ variable "backend_latency_bucket_timeframe" { } variable "backend_latency_bucket_threshold_warning" { - description = "Latency in seconds (warning threshold)" + description = "Latency in milliseconds (warning threshold)" type = "string" default = 4000 } variable "backend_latency_bucket_threshold_critical" { - description = "Latency in seconds (critical threshold)" + description = "Latency in milliseconds (critical threshold)" type = "string" default = 8000 } diff --git a/cloud/gcp/lb/monitors-lb.tf b/cloud/gcp/lb/monitors-lb.tf index 141e229..8982222 100644 --- a/cloud/gcp/lb/monitors-lb.tf +++ b/cloud/gcp/lb/monitors-lb.tf @@ -76,7 +76,7 @@ EOF # Backend Latency for service # resource "datadog_monitor" "backend_latency_service" { - name = "[${var.environment}] GCP LB service backend latency {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" + name = "[${var.environment}] GCP LB service backend latency {{#is_alert}}{{{comparator}}} {{threshold}}ms ({{value}}ms){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}ms ({{value}}ms){{/is_warning}}" message = "${coalesce(var.backend_latency_service_message, var.message)}" type = "metric alert" @@ -112,7 +112,7 @@ EOF # Backend Latency for bucket # resource "datadog_monitor" "backend_latency_bucket" { - name = "[${var.environment}] GCP LB bucket backend latency {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" + name = "[${var.environment}] GCP LB bucket backend latency {{#is_alert}}{{{comparator}}} {{threshold}}ms ({{value}}ms){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}ms ({{value}}ms){{/is_warning}}" message = "${coalesce(var.backend_latency_bucket_message, var.message)}" type = "metric alert"