MON-77 Some documentation & lower thresold levels
This commit is contained in:
parent
1768c1621f
commit
0f22d51e9c
53
cloud/azure/eventhub/README.md
Normal file
53
cloud/azure/eventhub/README.md
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
Event Hub Datadog monitor
|
||||||
|
=========================
|
||||||
|
|
||||||
|
How to use this module
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
```
|
||||||
|
module "datadog-monitors-azure-eventhub" {
|
||||||
|
source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/eventhub?ref={revision}"
|
||||||
|
|
||||||
|
message = "${module.datadog-message-alerting.alerting-message}"
|
||||||
|
|
||||||
|
environment = "${var.environment}"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Purpose
|
||||||
|
-------
|
||||||
|
Creates a Datadog monitor with the following checks :
|
||||||
|
|
||||||
|
* Service status check
|
||||||
|
* Failed request ratio
|
||||||
|
* Erroneous requests ratio
|
||||||
|
|
||||||
|
Inputs
|
||||||
|
------
|
||||||
|
|
||||||
|
| Name | Description | Type | Default | Required |
|
||||||
|
|------|-------------|:----:|:-----:|:-----:|
|
||||||
|
| delay | Delay in seconds for the metric evaluation | string | `600` | no |
|
||||||
|
| environment | Architecture environment | string | - | yes |
|
||||||
|
| errors_rate_thresold_critical | Errors ratio (percentage) to trigger the critical alert | string | `3` | no |
|
||||||
|
| errors_rate_thresold_warning | Errors ratio (percentage) to trigger a warning alert | string | `1` | no |
|
||||||
|
| failed_requests_rate_thresold_critical | Failed requests ratio (percentage) to trigger the critical alert | string | `3` | no |
|
||||||
|
| failed_requests_rate_thresold_warning | Failed requests ratio (percentage) to trigger a warning alert | string | `1` | no |
|
||||||
|
| message | Message sent when an alert is triggered | string | - | yes |
|
||||||
|
| use_filter_tags | Filter the data with service tags if true | string | `true` | no |
|
||||||
|
|
||||||
|
Outputs
|
||||||
|
-------
|
||||||
|
|
||||||
|
| Name | Description |
|
||||||
|
|------|-------------|
|
||||||
|
| errors_monitor_id | Id of the `errors` monitor |
|
||||||
|
| failed_requests_monitor_id | Id of the `failed requests` monitor |
|
||||||
|
| status_monitor_id | Id of the `status` monitor |
|
||||||
|
|
||||||
|
Related documentation
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
Datadog documentation : [https://docs.datadoghq.com/integrations/azure_event_hub/](https://docs.datadoghq.com/integrations/azure_event_hub/)
|
||||||
|
|
||||||
|
Azure metrics documentation : [https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-metrics-azure-monitor](https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-metrics-azure-monitor)
|
||||||
@ -1,31 +1,35 @@
|
|||||||
variable "environment" {}
|
variable "environment" {}
|
||||||
|
|
||||||
variable "down_message" {}
|
variable "message" {
|
||||||
|
description = "Message sent when an alert is triggered"
|
||||||
variable "failed_requests_message" {}
|
}
|
||||||
|
|
||||||
variable "errors_message" {}
|
|
||||||
|
|
||||||
variable "delay" {
|
variable "delay" {
|
||||||
|
description = "Delay in seconds for the metric evaluation"
|
||||||
default = 600
|
default = 600
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "failed_requests_rate_thresold_critical" {
|
variable "failed_requests_rate_thresold_critical" {
|
||||||
default = 5
|
description = "Failed requests ratio (percentage) to trigger the critical alert"
|
||||||
|
default = 3
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "failed_requests_rate_thresold_warning" {
|
variable "failed_requests_rate_thresold_warning" {
|
||||||
default = 3
|
description = "Failed requests ratio (percentage) to trigger a warning alert"
|
||||||
|
default = 1
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "errors_rate_thresold_critical" {
|
variable "errors_rate_thresold_critical" {
|
||||||
default = 5
|
description = "Errors ratio (percentage) to trigger the critical alert"
|
||||||
}
|
|
||||||
|
|
||||||
variable "errors_rate_thresold_warning" {
|
|
||||||
default = 3
|
default = 3
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "errors_rate_thresold_warning" {
|
||||||
|
description = "Errors ratio (percentage) to trigger a warning alert"
|
||||||
|
default = 1
|
||||||
|
}
|
||||||
|
|
||||||
variable "use_filter_tags" {
|
variable "use_filter_tags" {
|
||||||
|
description = "Filter the data with service tags if true"
|
||||||
default = "true"
|
default = "true"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -9,7 +9,7 @@ data "template_file" "filter" {
|
|||||||
|
|
||||||
resource "datadog_monitor" "eventhub_status" {
|
resource "datadog_monitor" "eventhub_status" {
|
||||||
name = "[${var.environment}] Event Hub status"
|
name = "[${var.environment}] Event Hub status"
|
||||||
message = "${var.down_message}"
|
message = "${var.message}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(last_5m): avg:azure.eventhub_namespaces.status{${data.template_file.filter.rendered}} by {name,resource_group,region} != 1
|
avg(last_5m): avg:azure.eventhub_namespaces.status{${data.template_file.filter.rendered}} by {name,resource_group,region} != 1
|
||||||
@ -18,7 +18,7 @@ resource "datadog_monitor" "eventhub_status" {
|
|||||||
|
|
||||||
notify_no_data = true
|
notify_no_data = true
|
||||||
evaluation_delay = "${var.delay}"
|
evaluation_delay = "${var.delay}"
|
||||||
renotify_interval = 60
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
timeout_h = 0
|
timeout_h = 0
|
||||||
include_tags = true
|
include_tags = true
|
||||||
@ -30,7 +30,7 @@ resource "datadog_monitor" "eventhub_status" {
|
|||||||
|
|
||||||
resource "datadog_monitor" "eventhub_failed_requests" {
|
resource "datadog_monitor" "eventhub_failed_requests" {
|
||||||
name = "[${var.environment}] Event Hub failed requests"
|
name = "[${var.environment}] Event Hub failed requests"
|
||||||
message = "${var.failed_requests_message}"
|
message = "${var.message}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(last_5m): (
|
avg(last_5m): (
|
||||||
@ -49,7 +49,7 @@ resource "datadog_monitor" "eventhub_failed_requests" {
|
|||||||
|
|
||||||
notify_no_data = false
|
notify_no_data = false
|
||||||
evaluation_delay = "${var.delay}"
|
evaluation_delay = "${var.delay}"
|
||||||
renotify_interval = 60
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
timeout_h = 0
|
timeout_h = 0
|
||||||
include_tags = true
|
include_tags = true
|
||||||
@ -61,7 +61,7 @@ resource "datadog_monitor" "eventhub_failed_requests" {
|
|||||||
|
|
||||||
resource "datadog_monitor" "eventhub_errors" {
|
resource "datadog_monitor" "eventhub_errors" {
|
||||||
name = "[${var.environment}] Event Hub errors"
|
name = "[${var.environment}] Event Hub errors"
|
||||||
message = "${var.errors_message}"
|
message = "${var.message}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(last_5m): (
|
avg(last_5m): (
|
||||||
@ -84,7 +84,7 @@ resource "datadog_monitor" "eventhub_errors" {
|
|||||||
|
|
||||||
notify_no_data = false
|
notify_no_data = false
|
||||||
evaluation_delay = "${var.delay}"
|
evaluation_delay = "${var.delay}"
|
||||||
renotify_interval = 60
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
timeout_h = 0
|
timeout_h = 0
|
||||||
include_tags = true
|
include_tags = true
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user