From 1c00f748d6e75585d313c7ea334d5849836295e6 Mon Sep 17 00:00:00 2001 From: Alex Lemaresquier Date: Fri, 20 Jul 2018 23:09:38 +0200 Subject: [PATCH 01/12] MON-248-monitors-for-nginx-ingress-contr: add Nginx ingress controller 5xx monitor --- README.md | 3 ++ caas/k8s/ingress/inputs.tf | 63 ++++++++++++++++++++++++++++ caas/k8s/ingress/monitors-ingress.tf | 42 +++++++++++++++++++ 3 files changed, 108 insertions(+) create mode 100644 caas/k8s/ingress/inputs.tf create mode 100644 caas/k8s/ingress/monitors-ingress.tf diff --git a/README.md b/README.md index 7090482..3be9395 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,9 @@ The `//` is very important, it's a terraform specific syntax used to separate gi ### Monitors summary ### +- [caas](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/) + - [k8s](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/k8s/) + - [ingress](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/k8s/ingress/) - [cloud](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/) - [aws](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/) - [alb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/alb/) diff --git a/caas/k8s/ingress/inputs.tf b/caas/k8s/ingress/inputs.tf new file mode 100644 index 0000000..87e229a --- /dev/null +++ b/caas/k8s/ingress/inputs.tf @@ -0,0 +1,63 @@ +# Global Terraform +variable "team" { + type = "string" + default = "k8s" +} + +variable "environment" { + description = "Architecture Environment" + type = "string" +} + +# Global DataDog +variable "delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +variable "message" { + description = "Message sent when an alert is triggered" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +#Ingress +variable "ingress_5xx_silenced" { + description = "Groups to mute for Ingress 5xx errors monitor" + type = "map" + default = {} +} + +variable "ingress_5xx_message" { + description = "Message sent when an alert is triggered" + default = "" +} + +variable "ingress_5xx_timeframe" { + description = "Monitor timeframe for Ingress 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "ingress_5xx_threshold_critical" { + type = "string" + default = "10" +} + +variable "ingress_5xx_threshold_warning" { + type = "string" + default = "5" +} + +variable "artificial_requests_count" { + default = 5 + description = "Number of false requests used to mitigate false positive in case of low trafic" +} diff --git a/caas/k8s/ingress/monitors-ingress.tf b/caas/k8s/ingress/monitors-ingress.tf new file mode 100644 index 0000000..fb4d64a --- /dev/null +++ b/caas/k8s/ingress/monitors-ingress.tf @@ -0,0 +1,42 @@ +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_k8s_ingress:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + } +} + +resource "datadog_monitor" "Nginx_ingress_too_many_5xx" { + name = "[${var.environment}] [${var.team}] Nginx Ingress 5xx errors too high for {{ingress_class.name}} on {{upstream.name}} {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.ingress_5xx_message, var.message)}" + + query = < ${var.ingress_5xx_threshold_critical} + EOF + + type = "metric alert" + + thresholds { + warning = "${var.ingress_5xx_threshold_warning}" + critical = "${var.ingress_5xx_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + silenced = "${var.ingress_5xx_silenced}" + + tags = ["env:${var.environment}", "resource:ingress", "team:${var.team}", "provider:k8s"] +} From 40f25abcabfa6cc2dcd7d8c9b981971e01f4fa6b Mon Sep 17 00:00:00 2001 From: Alex Lemaresquier Date: Thu, 30 Aug 2018 16:30:42 +0200 Subject: [PATCH 02/12] MON-248-monitors-for-nginx-ingress-contr: add README.md --- caas/k8s/ingress/README.md | 59 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 caas/k8s/ingress/README.md diff --git a/caas/k8s/ingress/README.md b/caas/k8s/ingress/README.md new file mode 100644 index 0000000..87c2949 --- /dev/null +++ b/caas/k8s/ingress/README.md @@ -0,0 +1,59 @@ +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| artificial_requests_count | Number of false requests used to mitigate false positive in case of low trafic | string | `5` | no | +| delay | Delay in seconds for the metric evaluation | string | `900` | no | +| environment | Architecture Environment | string | - | yes | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| ingress_5xx_message | Message sent when an alert is triggered | string | `` | no | +| ingress_5xx_silenced | Groups to mute for Ingress 5xx errors monitor | map | `` | no | +| ingress_5xx_threshold_critical | | string | `10` | no | +| ingress_5xx_threshold_warning | | string | `5` | no | +| ingress_5xx_timeframe | Monitor timeframe for Ingress 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| message | Message sent when an alert is triggered | string | - | yes | +| team | Global Terraform | string | `k8s` | no | + +Outputs +------- + +| Name | Description | +|------|-------------| +| Nginx_ingress_too_many_5xx_id | id for monitor Nginx_ingress_too_many_5xx | + +Related documentation +--------------------- + +DataDog blog: https://www.datadoghq.com/blog/monitor-prometheus-metrics +https://github.com/kubernetes/ingress-nginx/pull/423/commits/1d38e3a38425f08de2f75fcae13896a3fec4d144 + +Nginx Ingress Controller setup +------------------------------ +Enable the following flags in the Nginx Ingress Controller chart +controller.stats.enabled=true,controller.metrics.enabled=true +and the following Datadog agent configuration for each ingress controller: +``` +datadog: + confd: + prometheus.yaml: |- + #nginx_upstream_responses_total{ingress_class,namespace,server,status_code:{1xx,2xx,3xx,4xx,5xx},upstream} + #nginx_upstream_requests_total{ingress_class,namespace,server,upstream} + init_config: + instances: + # The prometheus endpoint to query from + - prometheus_url: http://nginx-ingress-controller-metrics:9913/metrics + # This is NOT the ingress namespace, it is the prefix that will be used for the custom metrics + namespace: nginx-ingress + # Filter on the following metrics only + metrics: + - "nginx_upstream_requests_total" + - "nginx_upstream_responses_total" + # Adapt the tags to the current convention and verify that the monitor will match + tags: + - dd_monitoring:enabled + - dd_k8s_ingress:enabled + - dd_ingress_class:nginx + - env:prod +``` From e2a583670d7e40af7b6bd17e7171373225c33e66 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 30 Aug 2018 17:55:25 +0200 Subject: [PATCH 03/12] MON-248 update readme --- caas/k8s/ingress/README.md | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/caas/k8s/ingress/README.md b/caas/k8s/ingress/README.md index 87c2949..09284b6 100644 --- a/caas/k8s/ingress/README.md +++ b/caas/k8s/ingress/README.md @@ -1,5 +1,24 @@ -Inputs ------- +# CAAS K8S INGRESS DataDog monitors + +## How to use this module + +``` +module "datadog-monitors-caas-k8s-ingress" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//caas/k8s/ingress?ref={revision}" + + environment = "${var.environment}" + message = "${module.datadog-message-alerting.alerting-message}" +} + +``` + +## Purpose + +Creates DataDog monitors with the following checks: + +- Nginx Ingress 5xx errors too high for {{ingress_class.name}} on {{upstream.name}} + +## Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| @@ -16,8 +35,7 @@ Inputs | message | Message sent when an alert is triggered | string | - | yes | | team | Global Terraform | string | `k8s` | no | -Outputs -------- +## Outputs | Name | Description | |------|-------------| From ab2647ea9cff6b594456458afb731cdd18ef88df Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 30 Aug 2018 17:55:45 +0200 Subject: [PATCH 04/12] MON-248 add outputs --- caas/k8s/ingress/outputs.tf | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 caas/k8s/ingress/outputs.tf diff --git a/caas/k8s/ingress/outputs.tf b/caas/k8s/ingress/outputs.tf new file mode 100644 index 0000000..cacbec3 --- /dev/null +++ b/caas/k8s/ingress/outputs.tf @@ -0,0 +1,4 @@ +output "Nginx_ingress_too_many_5xx_id" { + description = "id for monitor Nginx_ingress_too_many_5xx" + value = "${datadog_monitor.Nginx_ingress_too_many_5xx.*.id}" +} From 0766bce61eb4895743c2b8f8011da6f854d5fc52 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 30 Aug 2018 18:03:46 +0200 Subject: [PATCH 05/12] MON-248 use filter tags modules --- caas/k8s/ingress/modules.tf | 19 +++++++++++++++++++ caas/k8s/ingress/monitors-ingress.tf | 4 ++-- 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 caas/k8s/ingress/modules.tf diff --git a/caas/k8s/ingress/modules.tf b/caas/k8s/ingress/modules.tf new file mode 100644 index 0000000..8cf5cd7 --- /dev/null +++ b/caas/k8s/ingress/modules.tf @@ -0,0 +1,19 @@ +module "filter-tags" { + source = "../../../common/filter-tags" + + environment = "${var.environment}" + resource = "ingress" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" + extra_tags = ["!upstream:upstream-default-backend"] +} + +module "filter-tags-5xx" { + source = "../../../common/filter-tags" + + environment = "${var.environment}" + resource = "ingress" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" + extra_tags = ["!upstream:upstream-default-backend,status_code:5xx"] +} diff --git a/caas/k8s/ingress/monitors-ingress.tf b/caas/k8s/ingress/monitors-ingress.tf index fb4d64a..9b23ed3 100644 --- a/caas/k8s/ingress/monitors-ingress.tf +++ b/caas/k8s/ingress/monitors-ingress.tf @@ -13,8 +13,8 @@ resource "datadog_monitor" "Nginx_ingress_too_many_5xx" { query = < ${var.ingress_5xx_threshold_critical} EOF From 048e3a8338b949a12d22683799a0b1008d1e613e Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 30 Aug 2018 18:05:31 +0200 Subject: [PATCH 06/12] MON-248 split delay --- caas/k8s/ingress/inputs.tf | 9 +++++++-- caas/k8s/ingress/monitors-ingress.tf | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/caas/k8s/ingress/inputs.tf b/caas/k8s/ingress/inputs.tf index 87e229a..5d94e11 100644 --- a/caas/k8s/ingress/inputs.tf +++ b/caas/k8s/ingress/inputs.tf @@ -10,9 +10,14 @@ variable "environment" { } # Global DataDog -variable "delay" { +variable "evaluation_delay" { description = "Delay in seconds for the metric evaluation" - default = 900 + default = 15 +} + +variable "new_host_delay" { + description = "Delay in seconds before monitor new resource" + default = 300 } variable "message" { diff --git a/caas/k8s/ingress/monitors-ingress.tf b/caas/k8s/ingress/monitors-ingress.tf index 9b23ed3..9327473 100644 --- a/caas/k8s/ingress/monitors-ingress.tf +++ b/caas/k8s/ingress/monitors-ingress.tf @@ -27,14 +27,14 @@ resource "datadog_monitor" "Nginx_ingress_too_many_5xx" { } notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" silenced = "${var.ingress_5xx_silenced}" From cf09393abbedc0236b694fd1c22335c9aa23acd1 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 30 Aug 2018 18:06:14 +0200 Subject: [PATCH 07/12] MON-248 auto update --- caas/k8s/ingress/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/caas/k8s/ingress/README.md b/caas/k8s/ingress/README.md index 09284b6..deac474 100644 --- a/caas/k8s/ingress/README.md +++ b/caas/k8s/ingress/README.md @@ -23,8 +23,8 @@ Creates DataDog monitors with the following checks: | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| | artificial_requests_count | Number of false requests used to mitigate false positive in case of low trafic | string | `5` | no | -| delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture Environment | string | - | yes | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `15` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | ingress_5xx_message | Message sent when an alert is triggered | string | `` | no | @@ -33,6 +33,7 @@ Creates DataDog monitors with the following checks: | ingress_5xx_threshold_warning | | string | `5` | no | | ingress_5xx_timeframe | Monitor timeframe for Ingress 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when an alert is triggered | string | - | yes | +| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | | team | Global Terraform | string | `k8s` | no | ## Outputs From 0433c7dabcf62f96b533462d77d4c5b77ce14732 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 30 Aug 2018 18:10:34 +0200 Subject: [PATCH 08/12] MON-248 new tag convention and extra tag and disable feature --- caas/k8s/ingress/inputs.tf | 12 ++++++++++++ caas/k8s/ingress/monitors-ingress.tf | 11 ++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/caas/k8s/ingress/inputs.tf b/caas/k8s/ingress/inputs.tf index 5d94e11..b96012e 100644 --- a/caas/k8s/ingress/inputs.tf +++ b/caas/k8s/ingress/inputs.tf @@ -41,6 +41,18 @@ variable "ingress_5xx_silenced" { default = {} } +variable "ingress_5xx_enabled" { + description = "Flag to enable Ingress 5xx errors monitor" + type = "string" + default = "true" +} + +variable "ingress_5xx_extra_tags" { + description = "Extra tags for Ingress 5xx errors monitor" + type = "list" + default = [] +} + variable "ingress_5xx_message" { description = "Message sent when an alert is triggered" default = "" diff --git a/caas/k8s/ingress/monitors-ingress.tf b/caas/k8s/ingress/monitors-ingress.tf index 9327473..25c7977 100644 --- a/caas/k8s/ingress/monitors-ingress.tf +++ b/caas/k8s/ingress/monitors-ingress.tf @@ -1,12 +1,5 @@ -data "template_file" "filter" { - template = "$${filter}" - - vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_k8s_ingress:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" - } -} - resource "datadog_monitor" "Nginx_ingress_too_many_5xx" { + count = "${var.ingress_5xx_enabled ? 1 : 0}" name = "[${var.environment}] [${var.team}] Nginx Ingress 5xx errors too high for {{ingress_class.name}} on {{upstream.name}} {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.ingress_5xx_message, var.message)}" @@ -38,5 +31,5 @@ resource "datadog_monitor" "Nginx_ingress_too_many_5xx" { silenced = "${var.ingress_5xx_silenced}" - tags = ["env:${var.environment}", "resource:ingress", "team:${var.team}", "provider:k8s"] + tags = ["env:${var.environment}", "type:caas", "provider:prometheus", "resource:nginx-ingress-controller", "team:claranet", "created-by:terraform", "${var.ingress_5xx_extra_tags}"] } From af1cb3f9ffb77adb51914258d83d2733ed8eb56d Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 30 Aug 2018 18:11:20 +0200 Subject: [PATCH 09/12] MON-248 auto update --- caas/k8s/ingress/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/caas/k8s/ingress/README.md b/caas/k8s/ingress/README.md index deac474..819248f 100644 --- a/caas/k8s/ingress/README.md +++ b/caas/k8s/ingress/README.md @@ -27,6 +27,8 @@ Creates DataDog monitors with the following checks: | evaluation_delay | Delay in seconds for the metric evaluation | string | `15` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| ingress_5xx_enabled | Flag to enable Ingress 5xx errors monitor | string | `true` | no | +| ingress_5xx_extra_tags | Extra tags for Ingress 5xx errors monitor | list | `` | no | | ingress_5xx_message | Message sent when an alert is triggered | string | `` | no | | ingress_5xx_silenced | Groups to mute for Ingress 5xx errors monitor | map | `` | no | | ingress_5xx_threshold_critical | | string | `10` | no | From c7285c7a0c793f949438a820040f3a6175d2ce84 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 30 Aug 2018 18:12:03 +0200 Subject: [PATCH 10/12] MON-248 require full window to true --- caas/k8s/ingress/monitors-ingress.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/caas/k8s/ingress/monitors-ingress.tf b/caas/k8s/ingress/monitors-ingress.tf index 25c7977..ee472bc 100644 --- a/caas/k8s/ingress/monitors-ingress.tf +++ b/caas/k8s/ingress/monitors-ingress.tf @@ -27,7 +27,7 @@ resource "datadog_monitor" "Nginx_ingress_too_many_5xx" { timeout_h = 0 include_tags = true locked = false - require_full_window = false + require_full_window = true silenced = "${var.ingress_5xx_silenced}" From 881a0fe78e7d5e515251c4b09a901c31eed8a78c Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Fri, 31 Aug 2018 13:07:17 +0200 Subject: [PATCH 11/12] MON-248 rename k8s to kubernetes --- README.md | 4 ++-- caas/{k8s => kubernetes}/ingress/README.md | 11 +++++------ caas/{k8s => kubernetes}/ingress/inputs.tf | 5 ----- caas/{k8s => kubernetes}/ingress/modules.tf | 0 caas/{k8s => kubernetes}/ingress/monitors-ingress.tf | 2 +- caas/{k8s => kubernetes}/ingress/outputs.tf | 0 6 files changed, 8 insertions(+), 14 deletions(-) rename caas/{k8s => kubernetes}/ingress/README.md (91%) rename caas/{k8s => kubernetes}/ingress/inputs.tf (96%) rename caas/{k8s => kubernetes}/ingress/modules.tf (100%) rename caas/{k8s => kubernetes}/ingress/monitors-ingress.tf (81%) rename caas/{k8s => kubernetes}/ingress/outputs.tf (100%) diff --git a/README.md b/README.md index 3be9395..4efcd0c 100644 --- a/README.md +++ b/README.md @@ -73,8 +73,8 @@ The `//` is very important, it's a terraform specific syntax used to separate gi ### Monitors summary ### - [caas](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/) - - [k8s](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/k8s/) - - [ingress](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/k8s/ingress/) + - [kubernetes](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/kubernetes/) + - [ingress](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/kubernetes/ingress/) - [cloud](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/) - [aws](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/) - [alb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/alb/) diff --git a/caas/k8s/ingress/README.md b/caas/kubernetes/ingress/README.md similarity index 91% rename from caas/k8s/ingress/README.md rename to caas/kubernetes/ingress/README.md index 819248f..7ba4994 100644 --- a/caas/k8s/ingress/README.md +++ b/caas/kubernetes/ingress/README.md @@ -1,10 +1,10 @@ -# CAAS K8S INGRESS DataDog monitors +# CAAS KUBERNETES INGRESS DataDog monitors ## How to use this module ``` -module "datadog-monitors-caas-k8s-ingress" { - source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//caas/k8s/ingress?ref={revision}" +module "datadog-monitors-caas-kubernetes-ingress" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//caas/kubernetes/ingress?ref={revision}" environment = "${var.environment}" message = "${module.datadog-message-alerting.alerting-message}" @@ -16,7 +16,7 @@ module "datadog-monitors-caas-k8s-ingress" { Creates DataDog monitors with the following checks: -- Nginx Ingress 5xx errors too high for {{ingress_class.name}} on {{upstream.name}} +- Nginx Ingress 5xx errors ## Inputs @@ -36,7 +36,6 @@ Creates DataDog monitors with the following checks: | ingress_5xx_timeframe | Monitor timeframe for Ingress 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when an alert is triggered | string | - | yes | | new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | -| team | Global Terraform | string | `k8s` | no | ## Outputs @@ -74,7 +73,7 @@ datadog: # Adapt the tags to the current convention and verify that the monitor will match tags: - dd_monitoring:enabled - - dd_k8s_ingress:enabled + - dd_ingress:enabled - dd_ingress_class:nginx - env:prod ``` diff --git a/caas/k8s/ingress/inputs.tf b/caas/kubernetes/ingress/inputs.tf similarity index 96% rename from caas/k8s/ingress/inputs.tf rename to caas/kubernetes/ingress/inputs.tf index b96012e..65ed192 100644 --- a/caas/k8s/ingress/inputs.tf +++ b/caas/kubernetes/ingress/inputs.tf @@ -1,9 +1,4 @@ # Global Terraform -variable "team" { - type = "string" - default = "k8s" -} - variable "environment" { description = "Architecture Environment" type = "string" diff --git a/caas/k8s/ingress/modules.tf b/caas/kubernetes/ingress/modules.tf similarity index 100% rename from caas/k8s/ingress/modules.tf rename to caas/kubernetes/ingress/modules.tf diff --git a/caas/k8s/ingress/monitors-ingress.tf b/caas/kubernetes/ingress/monitors-ingress.tf similarity index 81% rename from caas/k8s/ingress/monitors-ingress.tf rename to caas/kubernetes/ingress/monitors-ingress.tf index ee472bc..2cce3c3 100644 --- a/caas/k8s/ingress/monitors-ingress.tf +++ b/caas/kubernetes/ingress/monitors-ingress.tf @@ -1,6 +1,6 @@ resource "datadog_monitor" "Nginx_ingress_too_many_5xx" { count = "${var.ingress_5xx_enabled ? 1 : 0}" - name = "[${var.environment}] [${var.team}] Nginx Ingress 5xx errors too high for {{ingress_class.name}} on {{upstream.name}} {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Nginx Ingress 5xx errors {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.ingress_5xx_message, var.message)}" query = < Date: Fri, 31 Aug 2018 13:25:34 +0200 Subject: [PATCH 12/12] MON-248 add 4xx monitor --- caas/kubernetes/ingress/README.md | 15 +++++-- caas/kubernetes/ingress/inputs.tf | 43 ++++++++++++++++++++- caas/kubernetes/ingress/modules.tf | 10 +++++ caas/kubernetes/ingress/monitors-ingress.tf | 38 +++++++++++++++++- caas/kubernetes/ingress/outputs.tf | 11 ++++-- 5 files changed, 108 insertions(+), 9 deletions(-) diff --git a/caas/kubernetes/ingress/README.md b/caas/kubernetes/ingress/README.md index 7ba4994..0a7ba2a 100644 --- a/caas/kubernetes/ingress/README.md +++ b/caas/kubernetes/ingress/README.md @@ -16,6 +16,7 @@ module "datadog-monitors-caas-kubernetes-ingress" { Creates DataDog monitors with the following checks: +- Nginx Ingress 4xx errors - Nginx Ingress 5xx errors ## Inputs @@ -27,12 +28,19 @@ Creates DataDog monitors with the following checks: | evaluation_delay | Delay in seconds for the metric evaluation | string | `15` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| ingress_4xx_enabled | Flag to enable Ingress 4xx errors monitor | string | `true` | no | +| ingress_4xx_extra_tags | Extra tags for Ingress 4xx errors monitor | list | `` | no | +| ingress_4xx_message | Message sent when an alert is triggered | string | `` | no | +| ingress_4xx_silenced | Groups to mute for Ingress 4xx errors monitor | map | `` | no | +| ingress_4xx_threshold_critical | | string | `40` | no | +| ingress_4xx_threshold_warning | | string | `20` | no | +| ingress_4xx_timeframe | Monitor timeframe for Ingress 4xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | ingress_5xx_enabled | Flag to enable Ingress 5xx errors monitor | string | `true` | no | | ingress_5xx_extra_tags | Extra tags for Ingress 5xx errors monitor | list | `` | no | | ingress_5xx_message | Message sent when an alert is triggered | string | `` | no | | ingress_5xx_silenced | Groups to mute for Ingress 5xx errors monitor | map | `` | no | -| ingress_5xx_threshold_critical | | string | `10` | no | -| ingress_5xx_threshold_warning | | string | `5` | no | +| ingress_5xx_threshold_critical | | string | `20` | no | +| ingress_5xx_threshold_warning | | string | `10` | no | | ingress_5xx_timeframe | Monitor timeframe for Ingress 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when an alert is triggered | string | - | yes | | new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | @@ -41,7 +49,8 @@ Creates DataDog monitors with the following checks: | Name | Description | |------|-------------| -| Nginx_ingress_too_many_5xx_id | id for monitor Nginx_ingress_too_many_5xx | +| nginx_ingress_too_many_4xx_id | id for monitor nginx_ingress_too_many_4xx | +| nginx_ingress_too_many_5xx_id | id for monitor nginx_ingress_too_many_5xx | Related documentation --------------------- diff --git a/caas/kubernetes/ingress/inputs.tf b/caas/kubernetes/ingress/inputs.tf index 65ed192..7be1590 100644 --- a/caas/kubernetes/ingress/inputs.tf +++ b/caas/kubernetes/ingress/inputs.tf @@ -61,12 +61,51 @@ variable "ingress_5xx_timeframe" { variable "ingress_5xx_threshold_critical" { type = "string" - default = "10" + default = "20" } variable "ingress_5xx_threshold_warning" { type = "string" - default = "5" + default = "10" +} + +variable "ingress_4xx_silenced" { + description = "Groups to mute for Ingress 4xx errors monitor" + type = "map" + default = {} +} + +variable "ingress_4xx_enabled" { + description = "Flag to enable Ingress 4xx errors monitor" + type = "string" + default = "true" +} + +variable "ingress_4xx_extra_tags" { + description = "Extra tags for Ingress 4xx errors monitor" + type = "list" + default = [] +} + +variable "ingress_4xx_message" { + description = "Message sent when an alert is triggered" + default = "" +} + +variable "ingress_4xx_timeframe" { + description = "Monitor timeframe for Ingress 4xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "ingress_4xx_threshold_critical" { + type = "string" + default = "40" +} + +variable "ingress_4xx_threshold_warning" { + type = "string" + default = "20" } variable "artificial_requests_count" { diff --git a/caas/kubernetes/ingress/modules.tf b/caas/kubernetes/ingress/modules.tf index 8cf5cd7..ee1e148 100644 --- a/caas/kubernetes/ingress/modules.tf +++ b/caas/kubernetes/ingress/modules.tf @@ -17,3 +17,13 @@ module "filter-tags-5xx" { filter_tags_custom = "${var.filter_tags_custom}" extra_tags = ["!upstream:upstream-default-backend,status_code:5xx"] } + +module "filter-tags-4xx" { + source = "../../../common/filter-tags" + + environment = "${var.environment}" + resource = "ingress" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" + extra_tags = ["!upstream:upstream-default-backend,status_code:4xx"] +} diff --git a/caas/kubernetes/ingress/monitors-ingress.tf b/caas/kubernetes/ingress/monitors-ingress.tf index 2cce3c3..ab44196 100644 --- a/caas/kubernetes/ingress/monitors-ingress.tf +++ b/caas/kubernetes/ingress/monitors-ingress.tf @@ -1,4 +1,4 @@ -resource "datadog_monitor" "Nginx_ingress_too_many_5xx" { +resource "datadog_monitor" "nginx_ingress_too_many_5xx" { count = "${var.ingress_5xx_enabled ? 1 : 0}" name = "[${var.environment}] Nginx Ingress 5xx errors {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.ingress_5xx_message, var.message)}" @@ -33,3 +33,39 @@ resource "datadog_monitor" "Nginx_ingress_too_many_5xx" { tags = ["env:${var.environment}", "type:caas", "provider:prometheus", "resource:nginx-ingress-controller", "team:claranet", "created-by:terraform", "${var.ingress_5xx_extra_tags}"] } + +resource "datadog_monitor" "nginx_ingress_too_many_4xx" { + count = "${var.ingress_4xx_enabled ? 1 : 0}" + name = "[${var.environment}] Nginx Ingress 4xx errors {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.ingress_4xx_message, var.message)}" + + query = < ${var.ingress_4xx_threshold_critical} + EOF + + type = "metric alert" + + thresholds { + warning = "${var.ingress_4xx_threshold_warning}" + critical = "${var.ingress_4xx_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + + silenced = "${var.ingress_4xx_silenced}" + + tags = ["env:${var.environment}", "type:caas", "provider:prometheus", "resource:nginx-ingress-controller", "team:claranet", "created-by:terraform", "${var.ingress_4xx_extra_tags}"] +} diff --git a/caas/kubernetes/ingress/outputs.tf b/caas/kubernetes/ingress/outputs.tf index cacbec3..28582f8 100644 --- a/caas/kubernetes/ingress/outputs.tf +++ b/caas/kubernetes/ingress/outputs.tf @@ -1,4 +1,9 @@ -output "Nginx_ingress_too_many_5xx_id" { - description = "id for monitor Nginx_ingress_too_many_5xx" - value = "${datadog_monitor.Nginx_ingress_too_many_5xx.*.id}" +output "nginx_ingress_too_many_5xx_id" { + description = "id for monitor nginx_ingress_too_many_5xx" + value = "${datadog_monitor.nginx_ingress_too_many_5xx.*.id}" +} + +output "nginx_ingress_too_many_4xx_id" { + description = "id for monitor nginx_ingress_too_many_4xx" + value = "${datadog_monitor.nginx_ingress_too_many_4xx.*.id}" }