From 922f66e2a27b57522917f7c6982c41b3a36cc0e0 Mon Sep 17 00:00:00 2001 From: jnancel Date: Thu, 27 Sep 2018 16:41:06 +0200 Subject: [PATCH 01/15] MON-316 Add monitors for serverfarms --- README.md | 1 + cloud/azure/serverfarms/inputs.tf | 140 ++++++++++++++++++ .../serverfarms/monitor-azure-serverfarms.tf | 83 +++++++++++ cloud/azure/serverfarms/outputs.tf | 0 4 files changed, 224 insertions(+) create mode 100644 cloud/azure/serverfarms/inputs.tf create mode 100644 cloud/azure/serverfarms/monitor-azure-serverfarms.tf create mode 100644 cloud/azure/serverfarms/outputs.tf diff --git a/README.md b/README.md index 5cadf39..c54a411 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,7 @@ The `//` is very important, it's a terraform specific syntax used to separate gi - [iothubs](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/iothubs/) - [keyvault](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/keyvault/) - [redis](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/redis/) + - [serverfarms](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/serverfarms/) - [servicebus](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/servicebus/) - [sql-database](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/sql-database/) - [storage](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/storage/) diff --git a/cloud/azure/serverfarms/inputs.tf b/cloud/azure/serverfarms/inputs.tf new file mode 100644 index 0000000..e620b7d --- /dev/null +++ b/cloud/azure/serverfarms/inputs.tf @@ -0,0 +1,140 @@ +# Global Terraform +variable "environment" { + description = "Architecture environment" + type = "string" +} + +# Global DataDog +variable "message" { + description = "Message sent when a Redis monitor is triggered" +} + +variable "evaluation_delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +variable "new_host_delay" { + description = "Delay in seconds before monitor new resource" + default = 300 +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +# Azure serverfarms specific variables + +# Status +variable "status_enabled" { + description = "Flag to enable the serverfarms status monitor" + type = "string" + default = "true" +} + +variable "status_message" { + description = "Custom message for serverfarm status monitor" + type = "string" +} + +variable "status_silenced" { + type = "map" + description = "Groups to mute for serverfarm status monitor" + default = {} +} + +variable "status_extra_tags" { + description = "Extra tags for Redis status monitor" + type = "list" + default = [] +} + +variable "status_time_aggregator" { + description = "Monitor aggregator for serverfarms status [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "status_timeframe" { + description = "Monitor timeframe for serverfarms status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +# CPU percentage +variable "cpu_percentage_enabled" { + description = "Flag to enable the serverfarms cpu_percentage monitor" + type = "string" + default = "true" +} + +variable "cpu_percentage_message" { + description = "Custom message for serverfarm cpu_percentage monitor" + type = "string" +} + +variable "cpu_percentage_silenced" { + type = "map" + description = "Groups to mute for serverfarm cpu_percentage monitor" + default = {} +} + +variable "cpu_percentage_extra_tags" { + description = "Extra tags for Redis cpu_percentage monitor" + type = "list" + default = [] +} + +variable "cpu_percentage_time_aggregator" { + description = "Monitor aggregator for serverfarms cpu_percentage [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "cpu_percentage_timeframe" { + description = "Monitor timeframe for serverfarms cpu_percentage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +# Memory percentage +variable "memory_percentage_enabled" { + description = "Flag to enable the serverfarms memory_percentage monitor" + type = "string" + default = "true" +} + +variable "memory_percentage_message" { + description = "Custom message for serverfarm memory_percentage monitor" + type = "string" +} + +variable "memory_percentage_silenced" { + type = "map" + description = "Groups to mute for serverfarm memory_percentage monitor" + default = {} +} + +variable "memory_percentage_extra_tags" { + description = "Extra tags for Redis memory_percentage monitor" + type = "list" + default = [] +} + +variable "memory_percentage_time_aggregator" { + description = "Monitor aggregator for serverfarms memory_percentage [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "memory_percentage_timeframe" { + description = "Monitor timeframe for serverfarms memory_percentage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} diff --git a/cloud/azure/serverfarms/monitor-azure-serverfarms.tf b/cloud/azure/serverfarms/monitor-azure-serverfarms.tf new file mode 100644 index 0000000..19663bf --- /dev/null +++ b/cloud/azure/serverfarms/monitor-azure-serverfarms.tf @@ -0,0 +1,83 @@ +resource "datadog_monitor" "status" { + count = "${var.status_enabled ? 1 : 0}" + name = "[${var.environment} Serverfarm {{name}} is down]" + message = "${coalesce(var.status_message, var.message)}" + + query = < Date: Thu, 27 Sep 2018 16:49:03 +0200 Subject: [PATCH 02/15] MON-316 Add modules.tf for filter-tags --- cloud/azure/serverfarms/modules.tf | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 cloud/azure/serverfarms/modules.tf diff --git a/cloud/azure/serverfarms/modules.tf b/cloud/azure/serverfarms/modules.tf new file mode 100644 index 0000000..3817b54 --- /dev/null +++ b/cloud/azure/serverfarms/modules.tf @@ -0,0 +1,8 @@ +module "filter-tags" { + source = "../../../common/filter-tags" + + environment = "${var.environment}" + resource = "redis" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" +} From 86aba7c83856c42d9b8a518a9c2d09f374db6dc4 Mon Sep 17 00:00:00 2001 From: jnancel Date: Thu, 27 Sep 2018 17:10:13 +0200 Subject: [PATCH 03/15] MON-316 Update file name and correct typo --- .../serverfarms/monitors-azure-serverfarms.tf | 83 +++++++++++++++++++ cloud/azure/serverfarms/outputs.tf | 15 ++++ 2 files changed, 98 insertions(+) create mode 100644 cloud/azure/serverfarms/monitors-azure-serverfarms.tf diff --git a/cloud/azure/serverfarms/monitors-azure-serverfarms.tf b/cloud/azure/serverfarms/monitors-azure-serverfarms.tf new file mode 100644 index 0000000..54e9e89 --- /dev/null +++ b/cloud/azure/serverfarms/monitors-azure-serverfarms.tf @@ -0,0 +1,83 @@ +resource "datadog_monitor" "status" { + count = "${var.status_enabled ? 1 : 0}" + name = "[${var.environment}] Serverfarm {{name}} is down" + message = "${coalesce(var.status_message, var.message)}" + + query = < Date: Thu, 27 Sep 2018 17:26:41 +0200 Subject: [PATCH 04/15] MON-316 auto update --- cloud/azure/serverfarms/README.md | 61 ++++++++++++++ cloud/azure/serverfarms/modules.tf | 2 +- .../serverfarms/monitor-azure-serverfarms.tf | 83 ------------------- cloud/azure/serverfarms/outputs.tf | 1 - 4 files changed, 62 insertions(+), 85 deletions(-) create mode 100644 cloud/azure/serverfarms/README.md delete mode 100644 cloud/azure/serverfarms/monitor-azure-serverfarms.tf diff --git a/cloud/azure/serverfarms/README.md b/cloud/azure/serverfarms/README.md new file mode 100644 index 0000000..3c3b6c8 --- /dev/null +++ b/cloud/azure/serverfarms/README.md @@ -0,0 +1,61 @@ +# CLOUD AZURE SERVERFARMS DataDog monitors + +## How to use this module + +``` +module "datadog-monitors-cloud-azure-serverfarms" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/serverfarms?ref={revision}" + + environment = "${var.environment}" + message = "${module.datadog-message-alerting.alerting-message}" +} + +``` + +## Purpose + +Creates DataDog monitors with the following checks: + +- Serverfarm {{name}} CPU percentage is too high +- Serverfarm {{name}} CPU percentage is too high +- Serverfarm {{name}} is down + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| cpu_percentage_enabled | Flag to enable the serverfarms cpu_percentage monitor | string | `true` | no | +| cpu_percentage_extra_tags | Extra tags for Redis cpu_percentage monitor | list | `[]` | no | +| cpu_percentage_message | Custom message for serverfarm cpu_percentage monitor | string | - | yes | +| cpu_percentage_silenced | Groups to mute for serverfarm cpu_percentage monitor | map | `{}` | no | +| cpu_percentage_time_aggregator | Monitor aggregator for serverfarms cpu_percentage [available values: min, max or avg] | string | `max` | no | +| cpu_percentage_timeframe | Monitor timeframe for serverfarms cpu_percentage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| environment | Architecture environment | string | - | yes | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| memory_percentage_enabled | Flag to enable the serverfarms memory_percentage monitor | string | `true` | no | +| memory_percentage_extra_tags | Extra tags for Redis memory_percentage monitor | list | `[]` | no | +| memory_percentage_message | Custom message for serverfarm memory_percentage monitor | string | - | yes | +| memory_percentage_silenced | Groups to mute for serverfarm memory_percentage monitor | map | `{}` | no | +| memory_percentage_time_aggregator | Monitor aggregator for serverfarms memory_percentage [available values: min, max or avg] | string | `max` | no | +| memory_percentage_timeframe | Monitor timeframe for serverfarms memory_percentage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| message | Message sent when a Redis monitor is triggered | string | - | yes | +| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | +| status_enabled | Flag to enable the serverfarms status monitor | string | `true` | no | +| status_extra_tags | Extra tags for Redis status monitor | list | `[]` | no | +| status_message | Custom message for serverfarm status monitor | string | - | yes | +| status_silenced | Groups to mute for serverfarm status monitor | map | `{}` | no | +| status_time_aggregator | Monitor aggregator for serverfarms status [available values: min, max or avg] | string | `max` | no | +| status_timeframe | Monitor timeframe for serverfarms status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| cpu_percentage_id | id for monitor cpu_percentage | +| memory_percentage_id | id for monitor memory_percentage | +| status_id | id for monitor status | + +## Related documentation + diff --git a/cloud/azure/serverfarms/modules.tf b/cloud/azure/serverfarms/modules.tf index 3817b54..d09e425 100644 --- a/cloud/azure/serverfarms/modules.tf +++ b/cloud/azure/serverfarms/modules.tf @@ -2,7 +2,7 @@ module "filter-tags" { source = "../../../common/filter-tags" environment = "${var.environment}" - resource = "redis" + resource = "serverfarms" filter_tags_use_defaults = "${var.filter_tags_use_defaults}" filter_tags_custom = "${var.filter_tags_custom}" } diff --git a/cloud/azure/serverfarms/monitor-azure-serverfarms.tf b/cloud/azure/serverfarms/monitor-azure-serverfarms.tf deleted file mode 100644 index 19663bf..0000000 --- a/cloud/azure/serverfarms/monitor-azure-serverfarms.tf +++ /dev/null @@ -1,83 +0,0 @@ -resource "datadog_monitor" "status" { - count = "${var.status_enabled ? 1 : 0}" - name = "[${var.environment} Serverfarm {{name}} is down]" - message = "${coalesce(var.status_message, var.message)}" - - query = < Date: Fri, 28 Sep 2018 10:04:00 +0200 Subject: [PATCH 05/15] MON-316 Add empty default for status_message --- cloud/azure/serverfarms/inputs.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/cloud/azure/serverfarms/inputs.tf b/cloud/azure/serverfarms/inputs.tf index e620b7d..7e4522c 100644 --- a/cloud/azure/serverfarms/inputs.tf +++ b/cloud/azure/serverfarms/inputs.tf @@ -41,6 +41,7 @@ variable "status_enabled" { variable "status_message" { description = "Custom message for serverfarm status monitor" type = "string" + default = "" } variable "status_silenced" { From 6f1e9a224628ec6098fbf11c87545aae47543d14 Mon Sep 17 00:00:00 2001 From: jnancel Date: Fri, 28 Sep 2018 10:13:04 +0200 Subject: [PATCH 06/15] MON-316 Add empty default message for all messages --- cloud/azure/serverfarms/inputs.tf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cloud/azure/serverfarms/inputs.tf b/cloud/azure/serverfarms/inputs.tf index 7e4522c..f2cd6ce 100644 --- a/cloud/azure/serverfarms/inputs.tf +++ b/cloud/azure/serverfarms/inputs.tf @@ -78,6 +78,7 @@ variable "cpu_percentage_enabled" { variable "cpu_percentage_message" { description = "Custom message for serverfarm cpu_percentage monitor" type = "string" + default = "" } variable "cpu_percentage_silenced" { @@ -114,6 +115,7 @@ variable "memory_percentage_enabled" { variable "memory_percentage_message" { description = "Custom message for serverfarm memory_percentage monitor" type = "string" + default = "" } variable "memory_percentage_silenced" { From 582b6a92d80f72962f61e7569c7e59987324ce26 Mon Sep 17 00:00:00 2001 From: jnancel Date: Fri, 28 Sep 2018 10:37:03 +0200 Subject: [PATCH 07/15] MON-316 Add threshold warning/critical for memory and cpu percentage --- cloud/azure/serverfarms/inputs.tf | 20 +++++++++++++++++++ .../serverfarms/monitors-azure-serverfarms.tf | 14 +++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/cloud/azure/serverfarms/inputs.tf b/cloud/azure/serverfarms/inputs.tf index f2cd6ce..3f88290 100644 --- a/cloud/azure/serverfarms/inputs.tf +++ b/cloud/azure/serverfarms/inputs.tf @@ -105,6 +105,16 @@ variable "cpu_percentage_timeframe" { default = "last_5m" } +variable "cpu_percentage_threshold_critical" { + description = "CPU percentage (critical threshold)" + default = 80 +} + +variable "cpu_percentage_threshold_warning" { + description = "CPU percentage (warning threshold)" + default = 60 +} + # Memory percentage variable "memory_percentage_enabled" { description = "Flag to enable the serverfarms memory_percentage monitor" @@ -141,3 +151,13 @@ variable "memory_percentage_timeframe" { type = "string" default = "last_5m" } + +variable "memory_percentage_threshold_critical" { + description = "Memory percentage (critical threshold)" + default = 80 +} + +variable "memory_percentage_threshold_warning" { + description = "Memory percentage (warning threshold)" + default = 60 +} diff --git a/cloud/azure/serverfarms/monitors-azure-serverfarms.tf b/cloud/azure/serverfarms/monitors-azure-serverfarms.tf index 54e9e89..f593c3c 100644 --- a/cloud/azure/serverfarms/monitors-azure-serverfarms.tf +++ b/cloud/azure/serverfarms/monitors-azure-serverfarms.tf @@ -34,11 +34,16 @@ resource "datadog_monitor" "cpu_percentage" { query = < ${var.cpu_percentage_threshold_critical} EOF type = "metric alert" + thresholds { + warning = "${var.cpu_percentage_threshold_warning}" + critical = "${var.cpu_percentage_threshold_critical}" + } + silenced = "${var.cpu_percentage_silenced}" notify_no_data = true @@ -62,11 +67,16 @@ resource "datadog_monitor" "memory_percentage" { query = < ${var.memory_percentage_threshold_critical} EOF type = "metric alert" + thresholds { + warning = "${var.memory_percentage_threshold_warning}" + critical = "${var.memory_percentage_threshold_critical}" + } + silenced = "${var.memory_percentage_silenced}" notify_no_data = true From 062099eb3124540264a53c924a2252db38a2c31e Mon Sep 17 00:00:00 2001 From: jnancel Date: Fri, 28 Sep 2018 11:02:39 +0200 Subject: [PATCH 08/15] MON-316 Update readme with auto-update --- cloud/azure/serverfarms/README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cloud/azure/serverfarms/README.md b/cloud/azure/serverfarms/README.md index 3c3b6c8..a791f34 100644 --- a/cloud/azure/serverfarms/README.md +++ b/cloud/azure/serverfarms/README.md @@ -26,8 +26,10 @@ Creates DataDog monitors with the following checks: |------|-------------|:----:|:-----:|:-----:| | cpu_percentage_enabled | Flag to enable the serverfarms cpu_percentage monitor | string | `true` | no | | cpu_percentage_extra_tags | Extra tags for Redis cpu_percentage monitor | list | `[]` | no | -| cpu_percentage_message | Custom message for serverfarm cpu_percentage monitor | string | - | yes | +| cpu_percentage_message | Custom message for serverfarm cpu_percentage monitor | string | `` | no | | cpu_percentage_silenced | Groups to mute for serverfarm cpu_percentage monitor | map | `{}` | no | +| cpu_percentage_threshold_critical | CPU percentage (critical threshold) | string | `80` | no | +| cpu_percentage_threshold_warning | CPU percentage (warning threshold) | string | `60` | no | | cpu_percentage_time_aggregator | Monitor aggregator for serverfarms cpu_percentage [available values: min, max or avg] | string | `max` | no | | cpu_percentage_timeframe | Monitor timeframe for serverfarms cpu_percentage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | environment | Architecture environment | string | - | yes | @@ -36,15 +38,17 @@ Creates DataDog monitors with the following checks: | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | memory_percentage_enabled | Flag to enable the serverfarms memory_percentage monitor | string | `true` | no | | memory_percentage_extra_tags | Extra tags for Redis memory_percentage monitor | list | `[]` | no | -| memory_percentage_message | Custom message for serverfarm memory_percentage monitor | string | - | yes | +| memory_percentage_message | Custom message for serverfarm memory_percentage monitor | string | `` | no | | memory_percentage_silenced | Groups to mute for serverfarm memory_percentage monitor | map | `{}` | no | +| memory_percentage_threshold_critical | Memory percentage (critical threshold) | string | `80` | no | +| memory_percentage_threshold_warning | Memory percentage (warning threshold) | string | `60` | no | | memory_percentage_time_aggregator | Monitor aggregator for serverfarms memory_percentage [available values: min, max or avg] | string | `max` | no | | memory_percentage_timeframe | Monitor timeframe for serverfarms memory_percentage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when a Redis monitor is triggered | string | - | yes | | new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | | status_enabled | Flag to enable the serverfarms status monitor | string | `true` | no | | status_extra_tags | Extra tags for Redis status monitor | list | `[]` | no | -| status_message | Custom message for serverfarm status monitor | string | - | yes | +| status_message | Custom message for serverfarm status monitor | string | `` | no | | status_silenced | Groups to mute for serverfarm status monitor | map | `{}` | no | | status_time_aggregator | Monitor aggregator for serverfarms status [available values: min, max or avg] | string | `max` | no | | status_timeframe | Monitor timeframe for serverfarms status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | From 0b3298e487c9b096a531c6134f4940461cf9eed9 Mon Sep 17 00:00:00 2001 From: jnancel Date: Fri, 28 Sep 2018 11:40:19 +0200 Subject: [PATCH 09/15] MON-316 Delete {{name}} since already present elsewhere in the message --- cloud/azure/serverfarms/monitors-azure-serverfarms.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cloud/azure/serverfarms/monitors-azure-serverfarms.tf b/cloud/azure/serverfarms/monitors-azure-serverfarms.tf index f593c3c..881b189 100644 --- a/cloud/azure/serverfarms/monitors-azure-serverfarms.tf +++ b/cloud/azure/serverfarms/monitors-azure-serverfarms.tf @@ -1,6 +1,6 @@ resource "datadog_monitor" "status" { count = "${var.status_enabled ? 1 : 0}" - name = "[${var.environment}] Serverfarm {{name}} is down" + name = "[${var.environment}] Serverfarm is down" message = "${coalesce(var.status_message, var.message)}" query = < Date: Fri, 28 Sep 2018 11:43:55 +0200 Subject: [PATCH 10/15] MON-316 Change notify_no_data to false on cpu an memory --- cloud/azure/serverfarms/monitors-azure-serverfarms.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cloud/azure/serverfarms/monitors-azure-serverfarms.tf b/cloud/azure/serverfarms/monitors-azure-serverfarms.tf index 881b189..54ce06a 100644 --- a/cloud/azure/serverfarms/monitors-azure-serverfarms.tf +++ b/cloud/azure/serverfarms/monitors-azure-serverfarms.tf @@ -46,7 +46,7 @@ resource "datadog_monitor" "cpu_percentage" { silenced = "${var.cpu_percentage_silenced}" - notify_no_data = true + notify_no_data = false evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false @@ -79,7 +79,7 @@ resource "datadog_monitor" "memory_percentage" { silenced = "${var.memory_percentage_silenced}" - notify_no_data = true + notify_no_data = false evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false From d04f6d03bd9cf79dd206744cff69b43812b540a3 Mon Sep 17 00:00:00 2001 From: jnancel Date: Fri, 28 Sep 2018 11:46:23 +0200 Subject: [PATCH 11/15] MON-216 Apply auto-update --- cloud/azure/serverfarms/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cloud/azure/serverfarms/README.md b/cloud/azure/serverfarms/README.md index a791f34..34094eb 100644 --- a/cloud/azure/serverfarms/README.md +++ b/cloud/azure/serverfarms/README.md @@ -16,9 +16,9 @@ module "datadog-monitors-cloud-azure-serverfarms" { Creates DataDog monitors with the following checks: -- Serverfarm {{name}} CPU percentage is too high -- Serverfarm {{name}} CPU percentage is too high -- Serverfarm {{name}} is down +- Serverfarm CPU percentage is too high +- Serverfarm is down +- Serverfarm memory percentage is too high ## Inputs From c16dd3676dd27ef016803b3ae02965ce3c4cb2f9 Mon Sep 17 00:00:00 2001 From: jnancel Date: Mon, 1 Oct 2018 13:55:04 +0200 Subject: [PATCH 12/15] MON-316 Change value according to omni zabbix templates --- cloud/azure/serverfarms/inputs.tf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cloud/azure/serverfarms/inputs.tf b/cloud/azure/serverfarms/inputs.tf index 3f88290..f096b72 100644 --- a/cloud/azure/serverfarms/inputs.tf +++ b/cloud/azure/serverfarms/inputs.tf @@ -102,17 +102,17 @@ variable "cpu_percentage_time_aggregator" { variable "cpu_percentage_timeframe" { description = "Monitor timeframe for serverfarms cpu_percentage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" - default = "last_5m" + default = "last_10m" } variable "cpu_percentage_threshold_critical" { description = "CPU percentage (critical threshold)" - default = 80 + default = 95 } variable "cpu_percentage_threshold_warning" { description = "CPU percentage (warning threshold)" - default = 60 + default = 90 } # Memory percentage @@ -154,10 +154,10 @@ variable "memory_percentage_timeframe" { variable "memory_percentage_threshold_critical" { description = "Memory percentage (critical threshold)" - default = 80 + default = 95 } variable "memory_percentage_threshold_warning" { description = "Memory percentage (warning threshold)" - default = 60 + default = 90 } From 539911d7068073b2ed420a268da94434797fff84 Mon Sep 17 00:00:00 2001 From: jnancel Date: Mon, 1 Oct 2018 16:18:56 +0200 Subject: [PATCH 13/15] MON-316 I forgot ( again ) the f****** auto-update :p --- cloud/azure/serverfarms/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cloud/azure/serverfarms/README.md b/cloud/azure/serverfarms/README.md index 34094eb..4cf7dc7 100644 --- a/cloud/azure/serverfarms/README.md +++ b/cloud/azure/serverfarms/README.md @@ -28,10 +28,10 @@ Creates DataDog monitors with the following checks: | cpu_percentage_extra_tags | Extra tags for Redis cpu_percentage monitor | list | `[]` | no | | cpu_percentage_message | Custom message for serverfarm cpu_percentage monitor | string | `` | no | | cpu_percentage_silenced | Groups to mute for serverfarm cpu_percentage monitor | map | `{}` | no | -| cpu_percentage_threshold_critical | CPU percentage (critical threshold) | string | `80` | no | -| cpu_percentage_threshold_warning | CPU percentage (warning threshold) | string | `60` | no | +| cpu_percentage_threshold_critical | CPU percentage (critical threshold) | string | `95` | no | +| cpu_percentage_threshold_warning | CPU percentage (warning threshold) | string | `90` | no | | cpu_percentage_time_aggregator | Monitor aggregator for serverfarms cpu_percentage [available values: min, max or avg] | string | `max` | no | -| cpu_percentage_timeframe | Monitor timeframe for serverfarms cpu_percentage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| cpu_percentage_timeframe | Monitor timeframe for serverfarms cpu_percentage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_10m` | no | | environment | Architecture environment | string | - | yes | | evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | @@ -40,8 +40,8 @@ Creates DataDog monitors with the following checks: | memory_percentage_extra_tags | Extra tags for Redis memory_percentage monitor | list | `[]` | no | | memory_percentage_message | Custom message for serverfarm memory_percentage monitor | string | `` | no | | memory_percentage_silenced | Groups to mute for serverfarm memory_percentage monitor | map | `{}` | no | -| memory_percentage_threshold_critical | Memory percentage (critical threshold) | string | `80` | no | -| memory_percentage_threshold_warning | Memory percentage (warning threshold) | string | `60` | no | +| memory_percentage_threshold_critical | Memory percentage (critical threshold) | string | `95` | no | +| memory_percentage_threshold_warning | Memory percentage (warning threshold) | string | `90` | no | | memory_percentage_time_aggregator | Monitor aggregator for serverfarms memory_percentage [available values: min, max or avg] | string | `max` | no | | memory_percentage_timeframe | Monitor timeframe for serverfarms memory_percentage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when a Redis monitor is triggered | string | - | yes | From 441927d0b3be1957dece077845dd753f0a643d75 Mon Sep 17 00:00:00 2001 From: jnancel Date: Tue, 2 Oct 2018 11:53:43 +0200 Subject: [PATCH 14/15] MON-316 Correct some descriptions --- cloud/azure/serverfarms/README.md | 8 ++++---- cloud/azure/serverfarms/inputs.tf | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cloud/azure/serverfarms/README.md b/cloud/azure/serverfarms/README.md index 4cf7dc7..8b1c02b 100644 --- a/cloud/azure/serverfarms/README.md +++ b/cloud/azure/serverfarms/README.md @@ -25,7 +25,7 @@ Creates DataDog monitors with the following checks: | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| | cpu_percentage_enabled | Flag to enable the serverfarms cpu_percentage monitor | string | `true` | no | -| cpu_percentage_extra_tags | Extra tags for Redis cpu_percentage monitor | list | `[]` | no | +| cpu_percentage_extra_tags | Extra tags for serverfarms cpu_percentage monitor | list | `[]` | no | | cpu_percentage_message | Custom message for serverfarm cpu_percentage monitor | string | `` | no | | cpu_percentage_silenced | Groups to mute for serverfarm cpu_percentage monitor | map | `{}` | no | | cpu_percentage_threshold_critical | CPU percentage (critical threshold) | string | `95` | no | @@ -37,17 +37,17 @@ Creates DataDog monitors with the following checks: | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | memory_percentage_enabled | Flag to enable the serverfarms memory_percentage monitor | string | `true` | no | -| memory_percentage_extra_tags | Extra tags for Redis memory_percentage monitor | list | `[]` | no | +| memory_percentage_extra_tags | Extra tags for serverfarms memory_percentage monitor | list | `[]` | no | | memory_percentage_message | Custom message for serverfarm memory_percentage monitor | string | `` | no | | memory_percentage_silenced | Groups to mute for serverfarm memory_percentage monitor | map | `{}` | no | | memory_percentage_threshold_critical | Memory percentage (critical threshold) | string | `95` | no | | memory_percentage_threshold_warning | Memory percentage (warning threshold) | string | `90` | no | | memory_percentage_time_aggregator | Monitor aggregator for serverfarms memory_percentage [available values: min, max or avg] | string | `max` | no | | memory_percentage_timeframe | Monitor timeframe for serverfarms memory_percentage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| message | Message sent when a Redis monitor is triggered | string | - | yes | +| message | Message sent when a serverfarms monitor is triggered | string | - | yes | | new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | | status_enabled | Flag to enable the serverfarms status monitor | string | `true` | no | -| status_extra_tags | Extra tags for Redis status monitor | list | `[]` | no | +| status_extra_tags | Extra tags for serverfarms status monitor | list | `[]` | no | | status_message | Custom message for serverfarm status monitor | string | `` | no | | status_silenced | Groups to mute for serverfarm status monitor | map | `{}` | no | | status_time_aggregator | Monitor aggregator for serverfarms status [available values: min, max or avg] | string | `max` | no | diff --git a/cloud/azure/serverfarms/inputs.tf b/cloud/azure/serverfarms/inputs.tf index f096b72..968751d 100644 --- a/cloud/azure/serverfarms/inputs.tf +++ b/cloud/azure/serverfarms/inputs.tf @@ -6,7 +6,7 @@ variable "environment" { # Global DataDog variable "message" { - description = "Message sent when a Redis monitor is triggered" + description = "Message sent when a serverfarms monitor is triggered" } variable "evaluation_delay" { @@ -51,7 +51,7 @@ variable "status_silenced" { } variable "status_extra_tags" { - description = "Extra tags for Redis status monitor" + description = "Extra tags for serverfarms status monitor" type = "list" default = [] } @@ -88,7 +88,7 @@ variable "cpu_percentage_silenced" { } variable "cpu_percentage_extra_tags" { - description = "Extra tags for Redis cpu_percentage monitor" + description = "Extra tags for serverfarms cpu_percentage monitor" type = "list" default = [] } @@ -135,7 +135,7 @@ variable "memory_percentage_silenced" { } variable "memory_percentage_extra_tags" { - description = "Extra tags for Redis memory_percentage monitor" + description = "Extra tags for serverfarms memory_percentage monitor" type = "list" default = [] } From 71d21c0aee0bdeffb652f3180bd3fe1879fd0494 Mon Sep 17 00:00:00 2001 From: jnancel Date: Wed, 3 Oct 2018 15:53:23 +0200 Subject: [PATCH 15/15] MON-316 Change default value for aggregators to avoid alert on spikes --- cloud/azure/serverfarms/README.md | 6 +++--- cloud/azure/serverfarms/inputs.tf | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cloud/azure/serverfarms/README.md b/cloud/azure/serverfarms/README.md index 8b1c02b..11a77a4 100644 --- a/cloud/azure/serverfarms/README.md +++ b/cloud/azure/serverfarms/README.md @@ -30,7 +30,7 @@ Creates DataDog monitors with the following checks: | cpu_percentage_silenced | Groups to mute for serverfarm cpu_percentage monitor | map | `{}` | no | | cpu_percentage_threshold_critical | CPU percentage (critical threshold) | string | `95` | no | | cpu_percentage_threshold_warning | CPU percentage (warning threshold) | string | `90` | no | -| cpu_percentage_time_aggregator | Monitor aggregator for serverfarms cpu_percentage [available values: min, max or avg] | string | `max` | no | +| cpu_percentage_time_aggregator | Monitor aggregator for serverfarms cpu_percentage [available values: min, max or avg] | string | `min` | no | | cpu_percentage_timeframe | Monitor timeframe for serverfarms cpu_percentage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_10m` | no | | environment | Architecture environment | string | - | yes | | evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | @@ -42,7 +42,7 @@ Creates DataDog monitors with the following checks: | memory_percentage_silenced | Groups to mute for serverfarm memory_percentage monitor | map | `{}` | no | | memory_percentage_threshold_critical | Memory percentage (critical threshold) | string | `95` | no | | memory_percentage_threshold_warning | Memory percentage (warning threshold) | string | `90` | no | -| memory_percentage_time_aggregator | Monitor aggregator for serverfarms memory_percentage [available values: min, max or avg] | string | `max` | no | +| memory_percentage_time_aggregator | Monitor aggregator for serverfarms memory_percentage [available values: min, max or avg] | string | `min` | no | | memory_percentage_timeframe | Monitor timeframe for serverfarms memory_percentage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when a serverfarms monitor is triggered | string | - | yes | | new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | @@ -50,7 +50,7 @@ Creates DataDog monitors with the following checks: | status_extra_tags | Extra tags for serverfarms status monitor | list | `[]` | no | | status_message | Custom message for serverfarm status monitor | string | `` | no | | status_silenced | Groups to mute for serverfarm status monitor | map | `{}` | no | -| status_time_aggregator | Monitor aggregator for serverfarms status [available values: min, max or avg] | string | `max` | no | +| status_time_aggregator | Monitor aggregator for serverfarms status [available values: min, max or avg] | string | `min` | no | | status_timeframe | Monitor timeframe for serverfarms status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | ## Outputs diff --git a/cloud/azure/serverfarms/inputs.tf b/cloud/azure/serverfarms/inputs.tf index 968751d..b734f63 100644 --- a/cloud/azure/serverfarms/inputs.tf +++ b/cloud/azure/serverfarms/inputs.tf @@ -59,7 +59,7 @@ variable "status_extra_tags" { variable "status_time_aggregator" { description = "Monitor aggregator for serverfarms status [available values: min, max or avg]" type = "string" - default = "max" + default = "min" } variable "status_timeframe" { @@ -96,7 +96,7 @@ variable "cpu_percentage_extra_tags" { variable "cpu_percentage_time_aggregator" { description = "Monitor aggregator for serverfarms cpu_percentage [available values: min, max or avg]" type = "string" - default = "max" + default = "min" } variable "cpu_percentage_timeframe" { @@ -143,7 +143,7 @@ variable "memory_percentage_extra_tags" { variable "memory_percentage_time_aggregator" { description = "Monitor aggregator for serverfarms memory_percentage [available values: min, max or avg]" type = "string" - default = "max" + default = "min" } variable "memory_percentage_timeframe" {