From d2e1aa5efddea62258790c9b0afff8dea0d51cf4 Mon Sep 17 00:00:00 2001 From: Laurent Piroelle Date: Mon, 30 Oct 2017 11:40:16 +0100 Subject: [PATCH 1/9] MON-77 Azure Event Hub monitors --- cloud/azure/eventhub/inputs.tf | 31 ++++++++ cloud/azure/eventhub/monitors-eventhub.tf | 86 +++++++++++++++++++++++ cloud/azure/eventhub/outputs.tf | 11 +++ 3 files changed, 128 insertions(+) create mode 100644 cloud/azure/eventhub/inputs.tf create mode 100644 cloud/azure/eventhub/monitors-eventhub.tf create mode 100644 cloud/azure/eventhub/outputs.tf diff --git a/cloud/azure/eventhub/inputs.tf b/cloud/azure/eventhub/inputs.tf new file mode 100644 index 0000000..a1c7ec4 --- /dev/null +++ b/cloud/azure/eventhub/inputs.tf @@ -0,0 +1,31 @@ +variable "environment" {} + +variable "down_message" {} + +variable "failed_requests_message" {} + +variable "errors_message" {} + +variable "delay" { + default = 600 +} + +variable "failed_requests_rate_thresold_critical" { + default = 5 +} + +variable "failed_requests_rate_thresold_warning" { + default = 3 +} + +variable "errors_rate_thresold_critical" { + default = 5 +} + +variable "errors_rate_thresold_warning" { + default = 3 +} + +variable "use_filter_tags" { + default = "true" +} diff --git a/cloud/azure/eventhub/monitors-eventhub.tf b/cloud/azure/eventhub/monitors-eventhub.tf new file mode 100644 index 0000000..7c22418 --- /dev/null +++ b/cloud/azure/eventhub/monitors-eventhub.tf @@ -0,0 +1,86 @@ +resource "datadog_monitor" "eventhub_status" { + name = "[${var.environment}] Event Hub status" + message = "${var.down_message}" + + query = < ${var.failed_requests_rate_thresold_critical} + EOF + type = "query alert" + + thresholds { + critical = "${var.failed_requests_rate_thresold_critical}" + warning = "${var.failed_requests_rate_thresold_warning}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 60 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 +} + +resource "datadog_monitor" "eventhub_errors" { + name = "[${var.environment}] Event Hub errors" + message = "${var.errors_message}" + + query = < ${var.errors_rate_thresold_critical} + EOF + type = "query alert" + + thresholds { + critical = "${var.errors_rate_thresold_critical}" + warning = "${var.errors_rate_thresold_warning}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 60 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 +} diff --git a/cloud/azure/eventhub/outputs.tf b/cloud/azure/eventhub/outputs.tf new file mode 100644 index 0000000..b9d1822 --- /dev/null +++ b/cloud/azure/eventhub/outputs.tf @@ -0,0 +1,11 @@ +output "status_monitor_id" { + value = "${datadog_monitor.eventhub_failed_requests.id}" +} + +output "failed_requests_monitor_id" { + value = "${datadog_monitor.eventhub_status.id}" +} + +output "errors_monitor_id" { + value = "${datadog_monitor.eventhub_errors.id}" +} From 15549efc52e50e03a0b2d5165bdf41a121607947 Mon Sep 17 00:00:00 2001 From: Laurent Piroelle Date: Mon, 30 Oct 2017 17:49:02 +0100 Subject: [PATCH 2/9] MON-77 Use data template for tag filter --- cloud/azure/eventhub/monitors-eventhub.tf | 31 +++++++++++++++-------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/cloud/azure/eventhub/monitors-eventhub.tf b/cloud/azure/eventhub/monitors-eventhub.tf index 7c22418..71b97b3 100644 --- a/cloud/azure/eventhub/monitors-eventhub.tf +++ b/cloud/azure/eventhub/monitors-eventhub.tf @@ -1,9 +1,18 @@ +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_eventhub:enabled,env:%s", var.environment) : "*"}" + } +} + + resource "datadog_monitor" "eventhub_status" { name = "[${var.environment}] Event Hub status" message = "${var.down_message}" query = < ${var.failed_requests_rate_thresold_critical} EOF type = "query alert" @@ -56,14 +65,14 @@ resource "datadog_monitor" "eventhub_errors" { query = < ${var.errors_rate_thresold_critical} EOF type = "query alert" From 3330aeb9dcb6574deea57d671fcf5faa9cfa528e Mon Sep 17 00:00:00 2001 From: Laurent Piroelle Date: Mon, 30 Oct 2017 18:00:12 +0100 Subject: [PATCH 3/9] MON-77 Fix tag filters --- cloud/azure/eventhub/monitors-eventhub.tf | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cloud/azure/eventhub/monitors-eventhub.tf b/cloud/azure/eventhub/monitors-eventhub.tf index 71b97b3..2b67590 100644 --- a/cloud/azure/eventhub/monitors-eventhub.tf +++ b/cloud/azure/eventhub/monitors-eventhub.tf @@ -12,7 +12,7 @@ resource "datadog_monitor" "eventhub_status" { message = "${var.down_message}" query = < ${var.failed_requests_rate_thresold_critical} EOF type = "query alert" @@ -65,14 +65,14 @@ resource "datadog_monitor" "eventhub_errors" { query = < ${var.errors_rate_thresold_critical} EOF type = "query alert" From 1768c1621f7fbcce64464c5aa4f19bd217fae538 Mon Sep 17 00:00:00 2001 From: Laurent Piroelle Date: Mon, 30 Oct 2017 19:05:40 +0100 Subject: [PATCH 4/9] MON-77 Change monitor type to to fix it --- cloud/azure/eventhub/monitors-eventhub.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/azure/eventhub/monitors-eventhub.tf b/cloud/azure/eventhub/monitors-eventhub.tf index 2b67590..7600215 100644 --- a/cloud/azure/eventhub/monitors-eventhub.tf +++ b/cloud/azure/eventhub/monitors-eventhub.tf @@ -14,7 +14,7 @@ resource "datadog_monitor" "eventhub_status" { query = < Date: Tue, 31 Oct 2017 08:51:34 +0100 Subject: [PATCH 5/9] MON-77 Some documentation & lower thresold levels --- cloud/azure/eventhub/README.md | 53 +++++++++++++++++++++++ cloud/azure/eventhub/inputs.tf | 26 ++++++----- cloud/azure/eventhub/monitors-eventhub.tf | 12 ++--- 3 files changed, 74 insertions(+), 17 deletions(-) create mode 100644 cloud/azure/eventhub/README.md diff --git a/cloud/azure/eventhub/README.md b/cloud/azure/eventhub/README.md new file mode 100644 index 0000000..a148377 --- /dev/null +++ b/cloud/azure/eventhub/README.md @@ -0,0 +1,53 @@ +Event Hub Datadog monitor +========================= + +How to use this module +---------------------- + +``` +module "datadog-monitors-azure-eventhub" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/eventhub?ref={revision}" + + message = "${module.datadog-message-alerting.alerting-message}" + + environment = "${var.environment}" +} +``` + +Purpose +------- +Creates a Datadog monitor with the following checks : + +* Service status check +* Failed request ratio +* Erroneous requests ratio + +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| delay | Delay in seconds for the metric evaluation | string | `600` | no | +| environment | Architecture environment | string | - | yes | +| errors_rate_thresold_critical | Errors ratio (percentage) to trigger the critical alert | string | `3` | no | +| errors_rate_thresold_warning | Errors ratio (percentage) to trigger a warning alert | string | `1` | no | +| failed_requests_rate_thresold_critical | Failed requests ratio (percentage) to trigger the critical alert | string | `3` | no | +| failed_requests_rate_thresold_warning | Failed requests ratio (percentage) to trigger a warning alert | string | `1` | no | +| message | Message sent when an alert is triggered | string | - | yes | +| use_filter_tags | Filter the data with service tags if true | string | `true` | no | + +Outputs +------- + +| Name | Description | +|------|-------------| +| errors_monitor_id | Id of the `errors` monitor | +| failed_requests_monitor_id | Id of the `failed requests` monitor | +| status_monitor_id | Id of the `status` monitor | + +Related documentation +--------------------- + +Datadog documentation : [https://docs.datadoghq.com/integrations/azure_event_hub/](https://docs.datadoghq.com/integrations/azure_event_hub/) + +Azure metrics documentation : [https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-metrics-azure-monitor](https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-metrics-azure-monitor) diff --git a/cloud/azure/eventhub/inputs.tf b/cloud/azure/eventhub/inputs.tf index a1c7ec4..a67caae 100644 --- a/cloud/azure/eventhub/inputs.tf +++ b/cloud/azure/eventhub/inputs.tf @@ -1,31 +1,35 @@ variable "environment" {} -variable "down_message" {} - -variable "failed_requests_message" {} - -variable "errors_message" {} +variable "message" { + description = "Message sent when an alert is triggered" +} variable "delay" { + description = "Delay in seconds for the metric evaluation" default = 600 } variable "failed_requests_rate_thresold_critical" { - default = 5 + description = "Failed requests ratio (percentage) to trigger the critical alert" + default = 3 } variable "failed_requests_rate_thresold_warning" { - default = 3 + description = "Failed requests ratio (percentage) to trigger a warning alert" + default = 1 } variable "errors_rate_thresold_critical" { - default = 5 -} - -variable "errors_rate_thresold_warning" { + description = "Errors ratio (percentage) to trigger the critical alert" default = 3 } +variable "errors_rate_thresold_warning" { + description = "Errors ratio (percentage) to trigger a warning alert" + default = 1 +} + variable "use_filter_tags" { + description = "Filter the data with service tags if true" default = "true" } diff --git a/cloud/azure/eventhub/monitors-eventhub.tf b/cloud/azure/eventhub/monitors-eventhub.tf index 7600215..efe1351 100644 --- a/cloud/azure/eventhub/monitors-eventhub.tf +++ b/cloud/azure/eventhub/monitors-eventhub.tf @@ -9,7 +9,7 @@ data "template_file" "filter" { resource "datadog_monitor" "eventhub_status" { name = "[${var.environment}] Event Hub status" - message = "${var.down_message}" + message = "${var.message}" query = < Date: Fri, 3 Nov 2017 20:41:57 +0100 Subject: [PATCH 6/9] MON-77 add tags and subscription_id --- cloud/azure/eventhub/README.md | 5 +++- cloud/azure/eventhub/inputs.tf | 34 +++++++++++++++++++---- cloud/azure/eventhub/monitors-eventhub.tf | 11 ++++++-- 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/cloud/azure/eventhub/README.md b/cloud/azure/eventhub/README.md index a148377..6e40955 100644 --- a/cloud/azure/eventhub/README.md +++ b/cloud/azure/eventhub/README.md @@ -9,8 +9,8 @@ module "datadog-monitors-azure-eventhub" { source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/eventhub?ref={revision}" message = "${module.datadog-message-alerting.alerting-message}" - environment = "${var.environment}" + subscription_id = "${var.subscription_id}" } ``` @@ -29,12 +29,15 @@ Inputs |------|-------------|:----:|:-----:|:-----:| | delay | Delay in seconds for the metric evaluation | string | `600` | no | | environment | Architecture environment | string | - | yes | +| provider | What is the monitored provider | string | - | yes | +| service | What is the monitored service | string | - | yes | | errors_rate_thresold_critical | Errors ratio (percentage) to trigger the critical alert | string | `3` | no | | errors_rate_thresold_warning | Errors ratio (percentage) to trigger a warning alert | string | `1` | no | | failed_requests_rate_thresold_critical | Failed requests ratio (percentage) to trigger the critical alert | string | `3` | no | | failed_requests_rate_thresold_warning | Failed requests ratio (percentage) to trigger a warning alert | string | `1` | no | | message | Message sent when an alert is triggered | string | - | yes | | use_filter_tags | Filter the data with service tags if true | string | `true` | no | +| subscription_id | Azure account id used as filter for monitors | string | - | yes | Outputs ------- diff --git a/cloud/azure/eventhub/inputs.tf b/cloud/azure/eventhub/inputs.tf index a67caae..d520dc2 100644 --- a/cloud/azure/eventhub/inputs.tf +++ b/cloud/azure/eventhub/inputs.tf @@ -1,5 +1,27 @@ -variable "environment" {} +# Global Terraform +variable "environment" { + description = "Architecture environment" + type = "string" +} +variable "subscription_id" { + description = "Azure account id used as filter for monitors" + type = "string" +} + +variable "provider" { + description = "Cloud provider which the monitor and its based metric depend on" + type = "string" + default = "azure" +} + +variable "service" { + description = "Service monitored by this set of monitors" + type = "string" + default = "storage" +} + +# Global DataDog variable "message" { description = "Message sent when an alert is triggered" } @@ -9,6 +31,11 @@ variable "delay" { default = 600 } +variable "use_filter_tags" { + description = "Filter the data with service tags if true" + default = "true" +} + variable "failed_requests_rate_thresold_critical" { description = "Failed requests ratio (percentage) to trigger the critical alert" default = 3 @@ -28,8 +55,3 @@ variable "errors_rate_thresold_warning" { description = "Errors ratio (percentage) to trigger a warning alert" default = 1 } - -variable "use_filter_tags" { - description = "Filter the data with service tags if true" - default = "true" -} diff --git a/cloud/azure/eventhub/monitors-eventhub.tf b/cloud/azure/eventhub/monitors-eventhub.tf index efe1351..89a3d8a 100644 --- a/cloud/azure/eventhub/monitors-eventhub.tf +++ b/cloud/azure/eventhub/monitors-eventhub.tf @@ -2,11 +2,10 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_eventhub:enabled,env:%s", var.environment) : "*"}" + filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "subscription_id:${var.subscription_id}"}" } } - resource "datadog_monitor" "eventhub_status" { name = "[${var.environment}] Event Hub status" message = "${var.message}" @@ -26,6 +25,8 @@ resource "datadog_monitor" "eventhub_status" { require_full_window = true new_host_delay = "${var.delay}" no_data_timeframe = 20 + + tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] } resource "datadog_monitor" "eventhub_failed_requests" { @@ -57,6 +58,8 @@ resource "datadog_monitor" "eventhub_failed_requests" { require_full_window = true new_host_delay = "${var.delay}" no_data_timeframe = 20 + + tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] } resource "datadog_monitor" "eventhub_errors" { @@ -91,5 +94,7 @@ resource "datadog_monitor" "eventhub_errors" { locked = false require_full_window = true new_host_delay = "${var.delay}" - no_data_timeframe = 20 + no_data_timeframe = 20o + + tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] } From 205f3e963596dee548183e5a34ec081ab5e6df08 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Fri, 3 Nov 2017 20:48:42 +0100 Subject: [PATCH 7/9] MON-77 update readme --- cloud/azure/eventhub/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cloud/azure/eventhub/README.md b/cloud/azure/eventhub/README.md index 6e40955..f4db2d6 100644 --- a/cloud/azure/eventhub/README.md +++ b/cloud/azure/eventhub/README.md @@ -29,15 +29,15 @@ Inputs |------|-------------|:----:|:-----:|:-----:| | delay | Delay in seconds for the metric evaluation | string | `600` | no | | environment | Architecture environment | string | - | yes | -| provider | What is the monitored provider | string | - | yes | -| service | What is the monitored service | string | - | yes | | errors_rate_thresold_critical | Errors ratio (percentage) to trigger the critical alert | string | `3` | no | | errors_rate_thresold_warning | Errors ratio (percentage) to trigger a warning alert | string | `1` | no | | failed_requests_rate_thresold_critical | Failed requests ratio (percentage) to trigger the critical alert | string | `3` | no | | failed_requests_rate_thresold_warning | Failed requests ratio (percentage) to trigger a warning alert | string | `1` | no | | message | Message sent when an alert is triggered | string | - | yes | +| provider | What is the monitored provider | string | azure | no | | use_filter_tags | Filter the data with service tags if true | string | `true` | no | | subscription_id | Azure account id used as filter for monitors | string | - | yes | +| service | What is the monitored service | string | storage | no | Outputs ------- From 5df915df51e3f0d17badc0c38b9e6e76770e80fe Mon Sep 17 00:00:00 2001 From: Laurent Piroelle Date: Thu, 23 Nov 2017 16:36:18 +0100 Subject: [PATCH 8/9] MON-77 Fix unattended char --- cloud/azure/eventhub/monitors-eventhub.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/azure/eventhub/monitors-eventhub.tf b/cloud/azure/eventhub/monitors-eventhub.tf index 89a3d8a..733e141 100644 --- a/cloud/azure/eventhub/monitors-eventhub.tf +++ b/cloud/azure/eventhub/monitors-eventhub.tf @@ -94,7 +94,7 @@ resource "datadog_monitor" "eventhub_errors" { locked = false require_full_window = true new_host_delay = "${var.delay}" - no_data_timeframe = 20o + no_data_timeframe = 20 tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] } From 6c10a32ff3303db46f8da3b746a9f1df3a0b35ae Mon Sep 17 00:00:00 2001 From: Laurent Piroelle Date: Thu, 23 Nov 2017 16:50:04 +0100 Subject: [PATCH 9/9] MON-77 Normalize monitors --- cloud/azure/eventhub/README.md | 6 ++---- cloud/azure/eventhub/inputs.tf | 26 ++++++----------------- cloud/azure/eventhub/monitors-eventhub.tf | 18 ++++++++-------- 3 files changed, 18 insertions(+), 32 deletions(-) diff --git a/cloud/azure/eventhub/README.md b/cloud/azure/eventhub/README.md index f4db2d6..b2573da 100644 --- a/cloud/azure/eventhub/README.md +++ b/cloud/azure/eventhub/README.md @@ -33,11 +33,9 @@ Inputs | errors_rate_thresold_warning | Errors ratio (percentage) to trigger a warning alert | string | `1` | no | | failed_requests_rate_thresold_critical | Failed requests ratio (percentage) to trigger the critical alert | string | `3` | no | | failed_requests_rate_thresold_warning | Failed requests ratio (percentage) to trigger a warning alert | string | `1` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when an alert is triggered | string | - | yes | -| provider | What is the monitored provider | string | azure | no | -| use_filter_tags | Filter the data with service tags if true | string | `true` | no | -| subscription_id | Azure account id used as filter for monitors | string | - | yes | -| service | What is the monitored service | string | storage | no | Outputs ------- diff --git a/cloud/azure/eventhub/inputs.tf b/cloud/azure/eventhub/inputs.tf index d520dc2..b41fdf5 100644 --- a/cloud/azure/eventhub/inputs.tf +++ b/cloud/azure/eventhub/inputs.tf @@ -4,23 +4,6 @@ variable "environment" { type = "string" } -variable "subscription_id" { - description = "Azure account id used as filter for monitors" - type = "string" -} - -variable "provider" { - description = "Cloud provider which the monitor and its based metric depend on" - type = "string" - default = "azure" -} - -variable "service" { - description = "Service monitored by this set of monitors" - type = "string" - default = "storage" -} - # Global DataDog variable "message" { description = "Message sent when an alert is triggered" @@ -31,11 +14,16 @@ variable "delay" { default = 600 } -variable "use_filter_tags" { - description = "Filter the data with service tags if true" +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" default = "true" } +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + variable "failed_requests_rate_thresold_critical" { description = "Failed requests ratio (percentage) to trigger the critical alert" default = 3 diff --git a/cloud/azure/eventhub/monitors-eventhub.tf b/cloud/azure/eventhub/monitors-eventhub.tf index 733e141..ff52507 100644 --- a/cloud/azure/eventhub/monitors-eventhub.tf +++ b/cloud/azure/eventhub/monitors-eventhub.tf @@ -2,12 +2,12 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "subscription_id:${var.subscription_id}"}" + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_eventhub:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" } } resource "datadog_monitor" "eventhub_status" { - name = "[${var.environment}] Event Hub status" + name = "[${var.environment}] Event Hub status is not ok on {{name}}" message = "${var.message}" query = < ${var.failed_requests_rate_thresold_critical} EOF - type = "query alert" + type = "metric alert" thresholds { critical = "${var.failed_requests_rate_thresold_critical}" @@ -59,11 +59,11 @@ resource "datadog_monitor" "eventhub_failed_requests" { new_host_delay = "${var.delay}" no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] + tags = ["env:${var.environment}", "resource:eventhub", "team:azure", "provider:azure"] } resource "datadog_monitor" "eventhub_errors" { - name = "[${var.environment}] Event Hub errors" + name = "[${var.environment}] Event Hub too much errors on {{name}}" message = "${var.message}" query = < ${var.errors_rate_thresold_critical} EOF - type = "query alert" + type = "metric alert" thresholds { critical = "${var.errors_rate_thresold_critical}" @@ -96,5 +96,5 @@ resource "datadog_monitor" "eventhub_errors" { new_host_delay = "${var.delay}" no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] + tags = ["env:${var.environment}", "resource:eventhub", "team:azure", "provider:azure"] }