diff --git a/README.md b/README.md index 92714a9..37b9581 100644 --- a/README.md +++ b/README.md @@ -116,3 +116,4 @@ The `//` is very important, it's a terraform specific syntax used to separate gi - [php-fpm](https://bitbucket.org/morea/terraform.feature.datadog/src/master/middleware/php-fpm/) - [system](https://bitbucket.org/morea/terraform.feature.datadog/src/master/system/) - [generic](https://bitbucket.org/morea/terraform.feature.datadog/src/master/system/generic/) + - [unreachable](https://bitbucket.org/morea/terraform.feature.datadog/src/master/system/unreachable/) diff --git a/system/generic/README.md b/system/generic/README.md index b2f238a..a23094d 100644 --- a/system/generic/README.md +++ b/system/generic/README.md @@ -22,7 +22,6 @@ Creates DataDog monitors with the following checks: - Free disk inodes - Free disk space - Free memory -- Host unreachable ## Inputs @@ -88,11 +87,6 @@ Creates DataDog monitors with the following checks: | free_memory_timeframe | Monitor timeframe for Free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when an alert is triggered | string | - | yes | | new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | -| unreachable_enabled | Flag to enable Host unreachable monitor | string | `true` | no | -| unreachable_extra_tags | Extra tags for Host unreachable monitor | list | `` | no | -| unreachable_message | Custom message for Host unreachable monitor | string | `` | no | -| unreachable_no_data_timeframe | Timeframe for Host unreachable monitor to alert on no data | string | `20` | no | -| unreachable_silenced | Groups to mute for Host unreachable monitor | map | `` | no | ## Outputs @@ -103,7 +97,6 @@ Creates DataDog monitors with the following checks: | datadog_free_disk_space_inodes_too_low_id | id for monitor datadog_free_disk_space_inodes_too_low | | datadog_free_disk_space_too_low_id | id for monitor datadog_free_disk_space_too_low | | datadog_free_memory_id | id for monitor datadog_free_memory | -| datadog_host_unreachable_id | id for monitor datadog_host_unreachable | | datadog_load_too_high_id | id for monitor datadog_load_too_high | ## Related documentation diff --git a/system/generic/inputs.tf b/system/generic/inputs.tf index b62c0b5..d051b03 100644 --- a/system/generic/inputs.tf +++ b/system/generic/inputs.tf @@ -31,36 +31,6 @@ variable "filter_tags_custom" { # System generic specific -variable "unreachable_silenced" { - description = "Groups to mute for Host unreachable monitor" - type = "map" - default = {} -} - -variable "unreachable_enabled" { - description = "Flag to enable Host unreachable monitor" - type = "string" - default = "true" -} - -variable "unreachable_extra_tags" { - description = "Extra tags for Host unreachable monitor" - type = "list" - default = [] -} - -variable "unreachable_message" { - description = "Custom message for Host unreachable monitor" - type = "string" - default = "" -} - -variable "unreachable_no_data_timeframe" { - description = "Timeframe for Host unreachable monitor to alert on no data" - type = "string" - default = 20 -} - variable "cpu_high_silenced" { description = "Groups to mute for CPU high monitor" type = "map" diff --git a/system/generic/monitors-system.tf b/system/generic/monitors-system.tf index 481a68d..ac84c0f 100644 --- a/system/generic/monitors-system.tf +++ b/system/generic/monitors-system.tf @@ -1,32 +1,3 @@ -resource "datadog_monitor" "datadog_host_unreachable" { - count = "${var.unreachable_enabled ? 1 : 0}" - name = "[${var.environment}] Host unreachable" - message = "${coalesce(var.unreachable_message, var.message)}" - - query = "\"datadog.agent.up\".over${module.filter-tags.service_check}.last(6).count_by_status()" - - type = "service check" - - thresholds { - ok = 1 - warning = 1 - critical = 5 - } - - notify_no_data = true - no_data_timeframe = "${var.unreachable_no_data_timeframe}" - new_host_delay = "${var.new_host_delay}" - notify_audit = false - timeout_h = 0 - include_tags = true - locked = false - require_full_window = true - - silenced = "${var.unreachable_silenced}" - - tags = ["env:${var.environment}", "type:system", "provider:host", "resource:generic", "team:claranet", "created-by:terraform", "${var.unreachable_extra_tags}"] -} - resource "datadog_monitor" "datadog_cpu_too_high" { count = "${var.cpu_high_enabled ? 1 : 0}" name = "[${var.environment}] CPU usage {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" diff --git a/system/generic/outputs.tf b/system/generic/outputs.tf index b25c2bf..ef034f9 100644 --- a/system/generic/outputs.tf +++ b/system/generic/outputs.tf @@ -1,8 +1,3 @@ -output "datadog_host_unreachable_id" { - description = "id for monitor datadog_host_unreachable" - value = "${datadog_monitor.datadog_host_unreachable.*.id}" -} - output "datadog_cpu_too_high_id" { description = "id for monitor datadog_cpu_too_high" value = "${datadog_monitor.datadog_cpu_too_high.*.id}" diff --git a/system/unreachable/README.md b/system/unreachable/README.md new file mode 100644 index 0000000..71233bb --- /dev/null +++ b/system/unreachable/README.md @@ -0,0 +1,44 @@ +# SYSTEM UNREACHABLE DataDog monitors + +## How to use this module + +``` +module "datadog-monitors-system-unreachable" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//system/unreachable?ref={revision}" + + environment = "${var.environment}" + message = "${module.datadog-message-alerting.alerting-message}" +} + +``` + +## Purpose + +Creates DataDog monitors with the following checks: + +- Host unreachable + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| environment | Architecture Environment | string | - | yes | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `15` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| message | Message sent when an alert is triggered | string | - | yes | +| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | +| unreachable_enabled | Flag to enable Host unreachable monitor | string | `true` | no | +| unreachable_extra_tags | Extra tags for Host unreachable monitor | list | `` | no | +| unreachable_message | Custom message for Host unreachable monitor | string | `` | no | +| unreachable_no_data_timeframe | Timeframe for Host unreachable monitor to alert on no data | string | `20` | no | +| unreachable_silenced | Groups to mute for Host unreachable monitor | map | `` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| host_unreachable_id | id for monitor host_unreachable | + +## Related documentation + diff --git a/system/unreachable/inputs.tf b/system/unreachable/inputs.tf new file mode 100644 index 0000000..ca98b4e --- /dev/null +++ b/system/unreachable/inputs.tf @@ -0,0 +1,62 @@ +# Global Terraform +variable "environment" { + description = "Architecture Environment" + type = "string" +} + +# Global DataDog +variable "evaluation_delay" { + description = "Delay in seconds for the metric evaluation" + default = 15 +} + +variable "new_host_delay" { + description = "Delay in seconds before monitor new resource" + default = 300 +} + +variable "message" { + description = "Message sent when an alert is triggered" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +# Unreachable + +variable "unreachable_silenced" { + description = "Groups to mute for Host unreachable monitor" + type = "map" + default = {} +} + +variable "unreachable_enabled" { + description = "Flag to enable Host unreachable monitor" + type = "string" + default = "true" +} + +variable "unreachable_extra_tags" { + description = "Extra tags for Host unreachable monitor" + type = "list" + default = [] +} + +variable "unreachable_message" { + description = "Custom message for Host unreachable monitor" + type = "string" + default = "" +} + +variable "unreachable_no_data_timeframe" { + description = "Timeframe for Host unreachable monitor to alert on no data" + type = "string" + default = 20 +} diff --git a/system/unreachable/modules.tf b/system/unreachable/modules.tf new file mode 100644 index 0000000..1c03eb2 --- /dev/null +++ b/system/unreachable/modules.tf @@ -0,0 +1,8 @@ +module "filter-tags" { + source = "../../common/filter-tags" + + environment = "${var.environment}" + resource = "system" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" +} diff --git a/system/unreachable/monitors-unreachable.tf b/system/unreachable/monitors-unreachable.tf new file mode 100644 index 0000000..c032dfb --- /dev/null +++ b/system/unreachable/monitors-unreachable.tf @@ -0,0 +1,28 @@ +resource "datadog_monitor" "host_unreachable" { + count = "${var.unreachable_enabled ? 1 : 0}" + name = "[${var.environment}] Host unreachable" + message = "${coalesce(var.unreachable_message, var.message)}" + + query = "\"datadog.agent.up\".over${module.filter-tags.service_check}.last(6).count_by_status()" + + type = "service check" + + thresholds { + ok = 1 + warning = 1 + critical = 5 + } + + notify_no_data = true + no_data_timeframe = "${var.unreachable_no_data_timeframe}" + new_host_delay = "${var.new_host_delay}" + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + + silenced = "${var.unreachable_silenced}" + + tags = ["env:${var.environment}", "type:system", "provider:host", "resource:unreachable", "team:claranet", "created-by:terraform", "${var.unreachable_extra_tags}"] +} diff --git a/system/unreachable/outputs.tf b/system/unreachable/outputs.tf new file mode 100644 index 0000000..49f13cf --- /dev/null +++ b/system/unreachable/outputs.tf @@ -0,0 +1,4 @@ +output "host_unreachable_id" { + description = "id for monitor host_unreachable" + value = "${datadog_monitor.host_unreachable.*.id}" +}