diff --git a/system/generic/README.md b/system/generic/README.md index b3b450c..ea6c8d4 100644 --- a/system/generic/README.md +++ b/system/generic/README.md @@ -16,6 +16,7 @@ module "datadog-monitors-system-generic" { Creates DataDog monitors with the following checks: +- Host unreachable - CPU usage - CPU load 5 - Free disk space @@ -81,6 +82,9 @@ Creates DataDog monitors with the following checks: | free_memory_timeframe | Monitor timeframe for Free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when an alert is triggered | string | - | yes | | new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | +| unreachable_extra_tags | Extra tags for Host unreachable monitor | list | `` | no | +| unreachable_message | Custom message for Host unreachable monitor | string | `` | no | +| unreachable_silenced | Groups to mute for Host unreachable monitor | map | `` | no | ## Outputs @@ -91,6 +95,7 @@ Creates DataDog monitors with the following checks: | datadog_free_disk_space_inodes_too_low_id | id for monitor datadog_free_disk_space_inodes_too_low | | datadog_free_disk_space_too_low_id | id for monitor datadog_free_disk_space_too_low | | datadog_free_memory_id | id for monitor datadog_free_memory | +| datadog_host_unreachable_id | id for monitor datadog_host_unreachable | | datadog_load_too_high_id | id for monitor datadog_load_too_high | ## Related documentation diff --git a/system/generic/inputs.tf b/system/generic/inputs.tf index 87b1bd8..249a6b7 100644 --- a/system/generic/inputs.tf +++ b/system/generic/inputs.tf @@ -29,7 +29,25 @@ variable "filter_tags_custom" { default = "*" } -# Custom CPU instance specific +# System generic specific + +variable "unreachable_silenced" { + description = "Groups to mute for Host unreachable monitor" + type = "map" + default = {} +} + +variable "unreachable_extra_tags" { + description = "Extra tags for Host unreachable monitor" + type = "list" + default = [] +} + +variable "unreachable_message" { + description = "Custom message for Host unreachable monitor" + type = "string" + default = "" +} variable "cpu_high_silenced" { description = "Groups to mute for CPU high monitor" diff --git a/system/generic/monitors-system.tf b/system/generic/monitors-system.tf index 89db0c4..f113d8d 100644 --- a/system/generic/monitors-system.tf +++ b/system/generic/monitors-system.tf @@ -1,3 +1,30 @@ +resource "datadog_monitor" "datadog_host_unreachable" { + name = "[${var.environment}] Host unreachable" + message = "${coalesce(var.unreachable_message, var.message)}" + + query = "\"datadog.agent.up\".over${module.filter-tags.service_check}.last(6).count_by_status()" + + type = "service check" + + thresholds { + ok = 1 + warning = 1 + critical = 5 + } + + notify_no_data = true + new_host_delay = "${var.new_host_delay}" + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + + silenced = "${var.unreachable_silenced}" + + tags = ["env:${var.environment}", "type:system", "provider:host", "resource:generic", "team:claranet", "created-by:terraform", "${var.unreachable_extra_tags}"] +} + resource "datadog_monitor" "datadog_cpu_too_high" { name = "[${var.environment}] CPU usage {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cpu_high_message, var.message)}" diff --git a/system/generic/outputs.tf b/system/generic/outputs.tf index 85492d8..e870cda 100644 --- a/system/generic/outputs.tf +++ b/system/generic/outputs.tf @@ -1,3 +1,8 @@ +output "datadog_host_unreachable_id" { + description = "id for monitor datadog_host_unreachable" + value = "${datadog_monitor.datadog_host_unreachable.id}" +} + output "datadog_cpu_too_high_id" { description = "id for monitor datadog_cpu_too_high" value = "${datadog_monitor.datadog_cpu_too_high.id}"