Merge branch 'MON-489_Datadog_monitors_Azure_VM_Disk' into 'master'

MON-489 "Datadog monitors azure vm disk"

Closes MON-489

See merge request claranet/pt-monitoring/projects/datadog/terraform/monitors!90
This commit is contained in:
Quentin Manfroi 2019-08-06 16:47:09 +02:00
commit 8571ac04e9
4 changed files with 86 additions and 1 deletions

View File

@ -18,6 +18,7 @@ Creates DataDog monitors with the following checks:
- Virtual Machine CPU usage
- Virtual Machine credit CPU
- Virtual Machine disk space
- Virtual Machine is unreachable
- Virtual Machine RAM reserved
@ -39,6 +40,13 @@ Creates DataDog monitors with the following checks:
| cpu\_usage\_threshold\_warning | Virtual Machine CPU usage in percent (warning threshold) | string | `"80"` | no |
| cpu\_usage\_time\_aggregator | Monitor aggregator for Virtual Machine CPU [available values: min, max or avg] | string | `"min"` | no |
| cpu\_usage\_timeframe | Monitor timeframe for Virtual Machine CPU [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_15m"` | no |
| disk\_space\_enabled | Flag to enable Virtual Machine status monitor | string | `"true"` | no |
| disk\_space\_extra\_tags | Extra tags for Virtual Machine free disk space monitor | list(string) | `[]` | no |
| disk\_space\_message | Custom message for Virtual Machine CPU free disk space monitor | string | `""` | no |
| disk\_space\_threshold\_critical | Virtual Machine free disk space in percent (critical threshold) | string | `"95"` | no |
| disk\_space\_threshold\_warning | Virtual Machine free disk space in percent (warning threshold) | string | `"90"` | no |
| disk\_space\_time\_aggregator | Monitor aggregator for Virtual Machine free disk space [available values: min, max or avg] | string | `"max"` | no |
| disk\_space\_timeframe | Monitor timeframe for Virtual Machine free disk space too low [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_5m"` | no |
| environment | Architecture environment | string | n/a | yes |
| evaluation\_delay | Delay in seconds for the metric evaluation | string | `"900"` | no |
| filter\_tags\_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `"*"` | no |
@ -66,6 +74,7 @@ Creates DataDog monitors with the following checks:
|------|-------------|
| virtualmachine\_cpu\_usage\_id | id for monitor virtualmachine_cpu_usage |
| virtualmachine\_credit\_cpu\_remaining\_too\_low\_id | id for monitor virtualmachine_credit_cpu_remaining_too_low |
| virtualmachine\_disk\_space\_id | id for monitor virtualmachine_disk_space |
| virtualmachine\_ram\_reserved\_id | id for monitor virtualmachine_ram_reserved |
| virtualmachine\_status\_id | id for monitor virtualmachine_status |

View File

@ -191,4 +191,42 @@ variable "ram_reserved_threshold_critical" {
default = 95
}
variable "disk_space_enabled" {
description = "Flag to enable Virtual Machine status monitor"
type = string
default = "true"
}
variable "disk_space_time_aggregator" {
description = "Monitor aggregator for Virtual Machine free disk space [available values: min, max or avg]"
type = string
default = "max"
}
variable "disk_space_timeframe" {
description = "Monitor timeframe for Virtual Machine free disk space too low [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = string
default = "last_5m"
}
variable "disk_space_threshold_critical" {
description = "Virtual Machine free disk space in percent (critical threshold)"
default = "95"
}
variable "disk_space_threshold_warning" {
description = "Virtual Machine free disk space in percent (warning threshold)"
default = "90"
}
variable "disk_space_extra_tags" {
description = "Extra tags for Virtual Machine free disk space monitor"
type = list(string)
default = []
}
variable "disk_space_message" {
description = "Custom message for Virtual Machine CPU free disk space monitor"
type = string
default = ""
}

View File

@ -105,7 +105,7 @@ resource "datadog_monitor" "virtualmachine_ram_reserved" {
type = "query alert"
query = <<EOQ
${var.ram_reserved_time_aggregator}(${var.ram_reserved_timeframe}):
${var.ram_reserved_time_aggregator}(${var.ram_reserved_timeframe}):
avg:azure.vm.memory_committed_bytes${module.filter-tags.query_alert} by {resource_group,region,name} / (
avg:azure.vm.memory_committed_bytes${module.filter-tags.query_alert} by {resource_group,region,name} +
avg:azure.vm.memory_available_bytes${module.filter-tags.query_alert} by {resource_group,region,name}) * 100
@ -134,3 +134,36 @@ EOQ
}
}
resource "datadog_monitor" "virtualmachine_disk_space" {
count = var.disk_space_enabled == "true" ? 1 : 0
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Virtual Machine disk space {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
message = coalesce(var.disk_space_message, var.message)
type = "query alert"
query = <<EOQ
${var.disk_space_time_aggregator}(${var.disk_space_timeframe}):
avg:azure.vm.builtin_filesystem_percentusedspace${module.filter-tags.query_alert} by {resource_group,region,name}
> ${var.disk_space_threshold_critical}
EOQ
thresholds = {
warning = var.disk_space_threshold_warning
critical = var.disk_space_threshold_critical
}
evaluation_delay = var.evaluation_delay
new_host_delay = var.new_host_delay
notify_no_data = false
renotify_interval = 0
notify_audit = false
timeout_h = 1
include_tags = true
locked = false
require_full_window = false
tags = concat(["env:${var.environment}", "type:cloud", "provider:azure", "resource:virtualmachine", "team:claranet", "created-by:terraform"], var.disk_space_extra_tags)
lifecycle {
ignore_changes = ["silenced"]
}
}

View File

@ -18,3 +18,8 @@ output "virtualmachine_ram_reserved_id" {
value = datadog_monitor.virtualmachine_ram_reserved.*.id
}
output "virtualmachine_disk_space_id" {
description = "id for monitor virtualmachine_disk_space"
value = datadog_monitor.virtualmachine_disk_space.*.id
}