MON-227 Standardize filters and split delays

This commit is contained in:
Rafael Romero Carmona 2018-08-13 16:39:46 +02:00 committed by Quentin Manfroi
parent 9d75a702e3
commit fb93704b17
3 changed files with 26 additions and 48 deletions

View File

@ -33,7 +33,6 @@ Creates DataDog monitors with the following checks:
| backend_latency_threshold_warning | Latency in seconds (warning threshold) | string | `2000` | no |
| backend_latency_time_aggregator | Timeframe for the GCP LB Backend Latency monitor | string | `min` | no |
| backend_latency_timeframe | Timeframe for the GCP LB Backend Latency monitor | string | `last_10m` | no |
| delay | Delay in seconds for the metric evaluation | string | `900` | no |
| environment | Architecture environment | string | - | yes |
| error_rate_4xx_artificial_request | Divisor Delta for the GCP LB 4XX Errors monitor | string | `5` | no |
| error_rate_4xx_extra_tags | Extra tags for GCP LB 4XX Errors monitor | list | `<list>` | no |
@ -49,8 +48,8 @@ Creates DataDog monitors with the following checks:
| error_rate_5xx_threshold_critical | Rate error in percentage (critical threshold) | string | `50` | no |
| error_rate_5xx_time_aggregator | Timeframe for the GCP LB 5XX Errors monitor | string | `sum` | no |
| error_rate_5xx_timeframe | Timeframe for the GCP LB 5XX Errors monitor | string | `last_5m` | no |
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no |
| filter_tags | Tags used for filtering | string | `*` | no |
| latency_extra_tags | Extra tags for GCP LB Latency monitor | list | `<list>` | no |
| latency_message | Custom message for the GCP LB Latency monitor | string | `` | no |
| latency_silenced | Groups to mute for GCP LB Latency monitor | map | `<map>` | no |
@ -59,7 +58,7 @@ Creates DataDog monitors with the following checks:
| latency_time_aggregator | Timeframe for the GCP LB Latency monitor | string | `min` | no |
| latency_timeframe | Timeframe for the GCP LB Latency monitor | string | `last_10m` | no |
| message | Message sent when a monitor is triggered | string | - | yes |
| project_id | ID of the GCP Project | string | - | yes |
| new_host_delay | Delay in seconds for the new host evaluation | string | `300` | no |
| request_count_extra_tags | Extra tags for GCP LB Request Count monitor | list | `<list>` | no |
| request_count_message | Custom message for the GCP LB Request Count monitor | string | `` | no |
| request_count_silenced | Groups to mute for GCP LB Request Count monitor | map | `<map>` | no |

View File

@ -6,13 +6,8 @@ variable "environment" {
type = "string"
}
variable "filter_tags_use_defaults" {
description = "Use default filter tags convention"
default = "true"
}
variable "filter_tags_custom" {
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
variable "filter_tags" {
description = "Tags used for filtering"
default = "*"
}
@ -20,17 +15,14 @@ variable "message" {
description = "Message sent when a monitor is triggered"
}
variable "delay" {
variable "evaluation_delay" {
description = "Delay in seconds for the metric evaluation"
default = 900
}
#
# Filter variables
#
variable "project_id" {
type = "string"
description = "ID of the GCP Project"
variable "new_host_delay" {
description = "Delay in seconds for the new host evaluation"
default = 300
}
#

View File

@ -1,16 +1,3 @@
#
# FILTER
#
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ?
format("project_id:%s", var.project_id) :
"${var.filter_tags_custom}"}"
}
}
#
# 4XX Errors
#
@ -22,9 +9,9 @@ resource "datadog_monitor" "error_rate_4xx" {
query = <<EOF
${var.error_rate_4xx_time_aggregator}(${var.error_rate_4xx_timeframe}):
avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered},response_code_class:400} by {backend_target_name}.as_count().fill(zero)
avg:gcp.loadbalancing.https.request_count{${var.filter_tags},response_code_class:400} by {backend_target_name}.as_count().fill(zero)
/
(avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered}} by {backend_target_name}.as_count().fill(zero) + ${var.error_rate_4xx_artificial_request} ) * 100
(avg:gcp.loadbalancing.https.request_count{${var.filter_tags}} by {backend_target_name}.as_count().fill(zero) + ${var.error_rate_4xx_artificial_request} ) * 100
> ${var.error_rate_4xx_threshold_critical}
EOF
@ -40,8 +27,8 @@ EOF
notify_no_data = false
renotify_interval = 0
evaluation_delay = "${var.delay}"
new_host_delay = "${var.delay}"
evaluation_delay = "${var.evaluation_delay}"
new_host_delay = "${var.new_host_delay}"
silenced = "${var.error_rate_4xx_silenced}"
@ -66,9 +53,9 @@ resource "datadog_monitor" "error_rate_5xx" {
query = <<EOF
${var.error_rate_5xx_time_aggregator}(${var.error_rate_5xx_timeframe}):
avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered},response_code_class:400} by {backend_target_name}.as_count().fill(zero)
avg:gcp.loadbalancing.https.request_count{${var.filter_tags},response_code_class:400} by {backend_target_name}.as_count().fill(zero)
/
(avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered}} by {backend_target_name}.as_count().fill(zero) + ${var.error_rate_5xx_artificial_request} ) * 100
(avg:gcp.loadbalancing.https.request_count{${var.filter_tags}} by {backend_target_name}.as_count().fill(zero) + ${var.error_rate_5xx_artificial_request} ) * 100
> ${var.error_rate_5xx_threshold_critical}
EOF
@ -84,8 +71,8 @@ EOF
notify_no_data = false
renotify_interval = 0
evaluation_delay = "${var.delay}"
new_host_delay = "${var.delay}"
evaluation_delay = "${var.evaluation_delay}"
new_host_delay = "${var.new_host_delay}"
silenced = "${var.error_rate_5xx_silenced}"
@ -110,7 +97,7 @@ resource "datadog_monitor" "latency" {
query = <<EOF
${var.latency_time_aggregator}(${var.latency_timeframe}):
min:gcp.loadbalancing.https.total_latencies.avg{${data.template_file.filter.rendered}} by {backend_target_name}
min:gcp.loadbalancing.https.total_latencies.avg{${var.filter_tags}} by {backend_target_name}
> ${var.latency_threshold_critical}
EOF
@ -127,8 +114,8 @@ EOF
notify_no_data = false
renotify_interval = 0
evaluation_delay = "${var.delay}"
new_host_delay = "${var.delay}"
evaluation_delay = "${var.evaluation_delay}"
new_host_delay = "${var.new_host_delay}"
silenced = "${var.latency_silenced}"
@ -153,7 +140,7 @@ resource "datadog_monitor" "backend_latency" {
query = <<EOF
${var.backend_latency_time_aggregator}(${var.backend_latency_timeframe}):
min:gcp.loadbalancing.https.backend_latencies.avg{${data.template_file.filter.rendered}} by {backend_target_name}
min:gcp.loadbalancing.https.backend_latencies.avg{${var.filter_tags}} by {backend_target_name}
> ${var.backend_latency_threshold_critical}
EOF
@ -170,8 +157,8 @@ EOF
notify_no_data = false
renotify_interval = 0
evaluation_delay = "${var.delay}"
new_host_delay = "${var.delay}"
evaluation_delay = "${var.evaluation_delay}"
new_host_delay = "${var.new_host_delay}"
silenced = "${var.backend_latency_silenced}"
@ -196,7 +183,7 @@ resource "datadog_monitor" "request_count" {
query = <<EOF
pct_change(${var.request_count_time_aggregator}(${var.request_count_timeframe}),${var.request_count_timeshift}):
avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered}} by {backend_target_name}.as_count().fill(zero)
avg:gcp.loadbalancing.https.request_count{${var.filter_tags}} by {backend_target_name}.as_count().fill(zero)
> ${var.request_count_threshold_critical}
EOF
@ -213,8 +200,8 @@ EOF
notify_no_data = false
renotify_interval = 0
evaluation_delay = "${var.delay}"
new_host_delay = "${var.delay}"
evaluation_delay = "${var.evaluation_delay}"
new_host_delay = "${var.new_host_delay}"
silenced = "${var.request_count_silenced}"