MON-77 Normalize monitors
This commit is contained in:
parent
5df915df51
commit
6c10a32ff3
@ -33,11 +33,9 @@ Inputs
|
||||
| errors_rate_thresold_warning | Errors ratio (percentage) to trigger a warning alert | string | `1` | no |
|
||||
| failed_requests_rate_thresold_critical | Failed requests ratio (percentage) to trigger the critical alert | string | `3` | no |
|
||||
| failed_requests_rate_thresold_warning | Failed requests ratio (percentage) to trigger a warning alert | string | `1` | no |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| message | Message sent when an alert is triggered | string | - | yes |
|
||||
| provider | What is the monitored provider | string | azure | no |
|
||||
| use_filter_tags | Filter the data with service tags if true | string | `true` | no |
|
||||
| subscription_id | Azure account id used as filter for monitors | string | - | yes |
|
||||
| service | What is the monitored service | string | storage | no |
|
||||
|
||||
Outputs
|
||||
-------
|
||||
|
||||
@ -4,23 +4,6 @@ variable "environment" {
|
||||
type = "string"
|
||||
}
|
||||
|
||||
variable "subscription_id" {
|
||||
description = "Azure account id used as filter for monitors"
|
||||
type = "string"
|
||||
}
|
||||
|
||||
variable "provider" {
|
||||
description = "Cloud provider which the monitor and its based metric depend on"
|
||||
type = "string"
|
||||
default = "azure"
|
||||
}
|
||||
|
||||
variable "service" {
|
||||
description = "Service monitored by this set of monitors"
|
||||
type = "string"
|
||||
default = "storage"
|
||||
}
|
||||
|
||||
# Global DataDog
|
||||
variable "message" {
|
||||
description = "Message sent when an alert is triggered"
|
||||
@ -31,11 +14,16 @@ variable "delay" {
|
||||
default = 600
|
||||
}
|
||||
|
||||
variable "use_filter_tags" {
|
||||
description = "Filter the data with service tags if true"
|
||||
variable "filter_tags_use_defaults" {
|
||||
description = "Use default filter tags convention"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "filter_tags_custom" {
|
||||
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
|
||||
default = "*"
|
||||
}
|
||||
|
||||
variable "failed_requests_rate_thresold_critical" {
|
||||
description = "Failed requests ratio (percentage) to trigger the critical alert"
|
||||
default = 3
|
||||
|
||||
@ -2,12 +2,12 @@ data "template_file" "filter" {
|
||||
template = "$${filter}"
|
||||
|
||||
vars {
|
||||
filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "subscription_id:${var.subscription_id}"}"
|
||||
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_eventhub:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
|
||||
}
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "eventhub_status" {
|
||||
name = "[${var.environment}] Event Hub status"
|
||||
name = "[${var.environment}] Event Hub status is not ok on {{name}}"
|
||||
message = "${var.message}"
|
||||
|
||||
query = <<EOF
|
||||
@ -26,11 +26,11 @@ resource "datadog_monitor" "eventhub_status" {
|
||||
new_host_delay = "${var.delay}"
|
||||
no_data_timeframe = 20
|
||||
|
||||
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||
tags = ["env:${var.environment}", "resource:eventhub", "team:azure", "provider:azure"]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "eventhub_failed_requests" {
|
||||
name = "[${var.environment}] Event Hub failed requests"
|
||||
name = "[${var.environment}] Event Hub too much failed requests on {{name}}"
|
||||
message = "${var.message}"
|
||||
|
||||
query = <<EOF
|
||||
@ -41,7 +41,7 @@ resource "datadog_monitor" "eventhub_failed_requests" {
|
||||
avg:azure.eventhub_namespaces.failed_requests{${data.template_file.filter.rendered}} by {name,resource_group,region}
|
||||
) > ${var.failed_requests_rate_thresold_critical}
|
||||
EOF
|
||||
type = "query alert"
|
||||
type = "metric alert"
|
||||
|
||||
thresholds {
|
||||
critical = "${var.failed_requests_rate_thresold_critical}"
|
||||
@ -59,11 +59,11 @@ resource "datadog_monitor" "eventhub_failed_requests" {
|
||||
new_host_delay = "${var.delay}"
|
||||
no_data_timeframe = 20
|
||||
|
||||
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||
tags = ["env:${var.environment}", "resource:eventhub", "team:azure", "provider:azure"]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "eventhub_errors" {
|
||||
name = "[${var.environment}] Event Hub errors"
|
||||
name = "[${var.environment}] Event Hub too much errors on {{name}}"
|
||||
message = "${var.message}"
|
||||
|
||||
query = <<EOF
|
||||
@ -78,7 +78,7 @@ resource "datadog_monitor" "eventhub_errors" {
|
||||
avg:azure.eventhub_namespaces.other_errors{${data.template_file.filter.rendered}} by {name,resource_group,region}
|
||||
) > ${var.errors_rate_thresold_critical}
|
||||
EOF
|
||||
type = "query alert"
|
||||
type = "metric alert"
|
||||
|
||||
thresholds {
|
||||
critical = "${var.errors_rate_thresold_critical}"
|
||||
@ -96,5 +96,5 @@ resource "datadog_monitor" "eventhub_errors" {
|
||||
new_host_delay = "${var.delay}"
|
||||
no_data_timeframe = 20
|
||||
|
||||
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||
tags = ["env:${var.environment}", "resource:eventhub", "team:azure", "provider:azure"]
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user