diff --git a/cloud/azure/eventhub/README.md b/cloud/azure/eventhub/README.md new file mode 100644 index 0000000..b2573da --- /dev/null +++ b/cloud/azure/eventhub/README.md @@ -0,0 +1,54 @@ +Event Hub Datadog monitor +========================= + +How to use this module +---------------------- + +``` +module "datadog-monitors-azure-eventhub" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/eventhub?ref={revision}" + + message = "${module.datadog-message-alerting.alerting-message}" + environment = "${var.environment}" + subscription_id = "${var.subscription_id}" +} +``` + +Purpose +------- +Creates a Datadog monitor with the following checks : + +* Service status check +* Failed request ratio +* Erroneous requests ratio + +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| delay | Delay in seconds for the metric evaluation | string | `600` | no | +| environment | Architecture environment | string | - | yes | +| errors_rate_thresold_critical | Errors ratio (percentage) to trigger the critical alert | string | `3` | no | +| errors_rate_thresold_warning | Errors ratio (percentage) to trigger a warning alert | string | `1` | no | +| failed_requests_rate_thresold_critical | Failed requests ratio (percentage) to trigger the critical alert | string | `3` | no | +| failed_requests_rate_thresold_warning | Failed requests ratio (percentage) to trigger a warning alert | string | `1` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| message | Message sent when an alert is triggered | string | - | yes | + +Outputs +------- + +| Name | Description | +|------|-------------| +| errors_monitor_id | Id of the `errors` monitor | +| failed_requests_monitor_id | Id of the `failed requests` monitor | +| status_monitor_id | Id of the `status` monitor | + +Related documentation +--------------------- + +Datadog documentation : [https://docs.datadoghq.com/integrations/azure_event_hub/](https://docs.datadoghq.com/integrations/azure_event_hub/) + +Azure metrics documentation : [https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-metrics-azure-monitor](https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-metrics-azure-monitor) diff --git a/cloud/azure/eventhub/inputs.tf b/cloud/azure/eventhub/inputs.tf new file mode 100644 index 0000000..b41fdf5 --- /dev/null +++ b/cloud/azure/eventhub/inputs.tf @@ -0,0 +1,45 @@ +# Global Terraform +variable "environment" { + description = "Architecture environment" + type = "string" +} + +# Global DataDog +variable "message" { + description = "Message sent when an alert is triggered" +} + +variable "delay" { + description = "Delay in seconds for the metric evaluation" + default = 600 +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +variable "failed_requests_rate_thresold_critical" { + description = "Failed requests ratio (percentage) to trigger the critical alert" + default = 3 +} + +variable "failed_requests_rate_thresold_warning" { + description = "Failed requests ratio (percentage) to trigger a warning alert" + default = 1 +} + +variable "errors_rate_thresold_critical" { + description = "Errors ratio (percentage) to trigger the critical alert" + default = 3 +} + +variable "errors_rate_thresold_warning" { + description = "Errors ratio (percentage) to trigger a warning alert" + default = 1 +} diff --git a/cloud/azure/eventhub/monitors-eventhub.tf b/cloud/azure/eventhub/monitors-eventhub.tf new file mode 100644 index 0000000..ff52507 --- /dev/null +++ b/cloud/azure/eventhub/monitors-eventhub.tf @@ -0,0 +1,100 @@ +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_eventhub:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + } +} + +resource "datadog_monitor" "eventhub_status" { + name = "[${var.environment}] Event Hub status is not ok on {{name}}" + message = "${var.message}" + + query = < ${var.failed_requests_rate_thresold_critical} + EOF + type = "metric alert" + + thresholds { + critical = "${var.failed_requests_rate_thresold_critical}" + warning = "${var.failed_requests_rate_thresold_warning}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 + + tags = ["env:${var.environment}", "resource:eventhub", "team:azure", "provider:azure"] +} + +resource "datadog_monitor" "eventhub_errors" { + name = "[${var.environment}] Event Hub too much errors on {{name}}" + message = "${var.message}" + + query = < ${var.errors_rate_thresold_critical} + EOF + type = "metric alert" + + thresholds { + critical = "${var.errors_rate_thresold_critical}" + warning = "${var.errors_rate_thresold_warning}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 + + tags = ["env:${var.environment}", "resource:eventhub", "team:azure", "provider:azure"] +} diff --git a/cloud/azure/eventhub/outputs.tf b/cloud/azure/eventhub/outputs.tf new file mode 100644 index 0000000..b9d1822 --- /dev/null +++ b/cloud/azure/eventhub/outputs.tf @@ -0,0 +1,11 @@ +output "status_monitor_id" { + value = "${datadog_monitor.eventhub_failed_requests.id}" +} + +output "failed_requests_monitor_id" { + value = "${datadog_monitor.eventhub_status.id}" +} + +output "errors_monitor_id" { + value = "${datadog_monitor.eventhub_errors.id}" +}