MON-191 - Generic system monitors updated with customizable aggregator
This commit is contained in:
parent
5bd3cab9ba
commit
4d1e840b7f
@ -11,7 +11,7 @@ resource "datadog_monitor" "status" {
|
||||
message = "${coalesce(var.status_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
avg(${var.status_timeframe}):avg:azure.cache_redis.status{${data.template_file.filter.rendered}} by {resource_group,region,name} != 1
|
||||
${var.status_aggregator}(${var.status_timeframe}):avg:azure.cache_redis.status{${data.template_file.filter.rendered}} by {resource_group,region,name} != 1
|
||||
EOF
|
||||
|
||||
type = "metric alert"
|
||||
@ -36,8 +36,8 @@ resource "datadog_monitor" "evictedkeys" {
|
||||
message = "${coalesce(var.evictedkeys_limit_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
avg(${var.evictedkeys_limit_timeframe}): (
|
||||
avg:azure.cache_redis.evictedkeys{${data.template_file.filter.rendered}} by {resource_group,region,name}
|
||||
${var.evictedkeys_limit_aggregator}(${var.evictedkeys_limit_timeframe}): (
|
||||
${var.evictedkeys_limit_aggregator}:azure.cache_redis.evictedkeys{${data.template_file.filter.rendered}} by {resource_group,region,name}
|
||||
) > ${var.evictedkeys_limit_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -68,8 +68,8 @@ resource "datadog_monitor" "percent_processor_time" {
|
||||
message = "${coalesce(var.percent_processor_time_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
avg(${var.percent_processor_time_timeframe}): (
|
||||
avg:azure.cache_redis.percent_processor_time{${data.template_file.filter.rendered}} by {resource_group,region,name}
|
||||
${var.percent_processor_time_aggregator}(${var.percent_processor_time_timeframe}): (
|
||||
${var.percent_processor_time_aggregator}:azure.cache_redis.percent_processor_time{${data.template_file.filter.rendered}} by {resource_group,region,name}
|
||||
) > ${var.percent_processor_time_threshold_critical}
|
||||
EOF
|
||||
|
||||
|
||||
@ -28,11 +28,13 @@ Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| cpu_high_aggregator | Monitor aggregator for CPU high [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| cpu_high_message | Custom message for CPU high monitor | string | `` | no |
|
||||
| cpu_high_silenced | Groups to mute for CPU high monitor | map | `<map>` | no |
|
||||
| cpu_high_threshold_critical | CPU high critical threshold | string | `95` | no |
|
||||
| cpu_high_threshold_warning | CPU high warning threshold | string | `80` | no |
|
||||
| cpu_high_timeframe | Monitor timeframe for CPU high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| cpu_load_aggregator | Monitor aggregator for CPU load ratio [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| cpu_load_message | Custom message for CPU load ratio monitor | string | `` | no |
|
||||
| cpu_load_silenced | Groups to mute for CPU load ratio monitor | map | `<map>` | no |
|
||||
| cpu_load_threshold_critical | CPU load ratio critical threshold | string | `4` | no |
|
||||
@ -42,16 +44,19 @@ Inputs
|
||||
| environment | Architecture Environment | string | - | yes |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| free_disk_inodes_aggregator | Monitor aggregator for Free disk inodes [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| free_disk_inodes_message | Custom message for Free disk inodes monitor | string | `` | no |
|
||||
| free_disk_inodes_silenced | Groups to mute for Free disk inodes monitor | map | `<map>` | no |
|
||||
| free_disk_inodes_threshold_critical | Free disk space critical threshold | string | `5` | no |
|
||||
| free_disk_inodes_threshold_warning | Free disk space warning threshold | string | `10` | no |
|
||||
| free_disk_inodes_timeframe | Monitor timeframe for Free disk inodes [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| free_disk_space_aggregator | Monitor aggregator for Free diskspace [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| free_disk_space_message | Custom message for Free diskspace monitor | string | `` | no |
|
||||
| free_disk_space_silenced | Groups to mute for Free diskspace monitor | map | `<map>` | no |
|
||||
| free_disk_space_threshold_critical | Free disk space critical threshold | string | `5` | no |
|
||||
| free_disk_space_threshold_warning | Free disk space warning threshold | string | `10` | no |
|
||||
| free_disk_space_timeframe | Monitor timeframe for Free diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| free_memory_aggregator | Monitor aggregator for Free memory [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| free_memory_message | Custom message for Free memory monitor | string | - | yes |
|
||||
| free_memory_silenced | Groups to mute for Free memory monitor | map | `<map>` | no |
|
||||
| free_memory_threshold_critical | Free disk space critical threshold | string | `5` | no |
|
||||
|
||||
@ -38,6 +38,12 @@ variable "cpu_high_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cpu_high_aggregator" {
|
||||
description = "Monitor aggregator for CPU high [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "cpu_high_timeframe" {
|
||||
description = "Monitor timeframe for CPU high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -66,6 +72,12 @@ variable "cpu_load_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cpu_load_aggregator" {
|
||||
description = "Monitor aggregator for CPU load ratio [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "cpu_load_timeframe" {
|
||||
description = "Monitor timeframe for CPU load ratio [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -94,6 +106,12 @@ variable "free_disk_space_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "free_disk_space_aggregator" {
|
||||
description = "Monitor aggregator for Free diskspace [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "free_disk_space_timeframe" {
|
||||
description = "Monitor timeframe for Free diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -122,6 +140,12 @@ variable "free_disk_inodes_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "free_disk_inodes_aggregator" {
|
||||
description = "Monitor aggregator for Free disk inodes [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "free_disk_inodes_timeframe" {
|
||||
description = "Monitor timeframe for Free disk inodes [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -149,6 +173,12 @@ variable "free_memory_message" {
|
||||
type = "string"
|
||||
}
|
||||
|
||||
variable "free_memory_aggregator" {
|
||||
description = "Monitor aggregator for Free memory [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "free_memory_timeframe" {
|
||||
description = "Monitor timeframe for Free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
|
||||
@ -11,7 +11,7 @@ resource "datadog_monitor" "datadog_cpu_too_high" {
|
||||
message = "${coalesce(var.cpu_high_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.cpu_high_timeframe}): (
|
||||
${var.cpu_high_aggregator}(${var.cpu_high_timeframe}): (
|
||||
100 - avg:system.cpu.idle{${data.template_file.filter.rendered}} by {region,host}
|
||||
) > ${var.cpu_high_threshold_critical}
|
||||
EOF
|
||||
@ -42,9 +42,9 @@ resource "datadog_monitor" "datadog_load_too_high" {
|
||||
message = "${coalesce(var.cpu_load_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.cpu_load_timeframe}): (
|
||||
avg:system.load.5{${data.template_file.filter.rendered}} by {region,host} /
|
||||
avg:system.core.count{${data.template_file.filter.rendered}} by {region,host}
|
||||
${var.cpu_load_aggregator}(${var.cpu_load_timeframe}): (
|
||||
${var.cpu_load_aggregator}:system.load.5{${data.template_file.filter.rendered}} by {region,host} /
|
||||
${var.cpu_load_aggregator}:system.core.count{${data.template_file.filter.rendered}} by {region,host}
|
||||
) > ${var.cpu_load_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -74,9 +74,9 @@ resource "datadog_monitor" "datadog_free_disk_space_too_low" {
|
||||
message = "${coalesce(var.free_disk_space_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.free_disk_space_timeframe}): (
|
||||
avg:system.disk.free{${data.template_file.filter.rendered},dd_disk:enabled} by {region,host,device} /
|
||||
avg:system.disk.total{${data.template_file.filter.rendered},dd_disk:enabled} by {region,host,device} * 100
|
||||
${var.free_disk_space_aggregator}(${var.free_disk_space_timeframe}): (
|
||||
${var.free_disk_space_aggregator}:system.disk.free{${data.template_file.filter.rendered},dd_disk:enabled} by {region,host,device} /
|
||||
${var.free_disk_space_aggregator}:system.disk.total{${data.template_file.filter.rendered},dd_disk:enabled} by {region,host,device} * 100
|
||||
) < ${var.free_disk_space_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -106,9 +106,9 @@ resource "datadog_monitor" "datadog_free_disk_space_inodes_too_low" {
|
||||
message = "${coalesce(var.free_disk_inodes_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.free_disk_inodes_timeframe}): (
|
||||
avg:system.fs.inodes.free{${data.template_file.filter.rendered},dd_disk:enabled} by {region,host,device} /
|
||||
avg:system.fs.inodes.total{${data.template_file.filter.rendered},dd_disk:enabled} by {region,host,device} * 100
|
||||
${var.free_disk_inodes_aggregator}(${var.free_disk_inodes_timeframe}): (
|
||||
${var.free_disk_inodes_aggregator}:system.fs.inodes.free{${data.template_file.filter.rendered},dd_disk:enabled} by {region,host,device} /
|
||||
${var.free_disk_inodes_aggregator}:system.fs.inodes.total{${data.template_file.filter.rendered},dd_disk:enabled} by {region,host,device} * 100
|
||||
) < ${var.free_disk_inodes_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -138,9 +138,9 @@ resource "datadog_monitor" "datadog_free_memory" {
|
||||
message = "${var.free_memory_message}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.free_memory_timeframe}): (
|
||||
avg:system.mem.free{${data.template_file.filter.rendered}} by {region,host} /
|
||||
avg:system.mem.total{${data.template_file.filter.rendered}} by {region,host} * 100
|
||||
${var.free_memory_aggregator}(${var.free_memory_timeframe}): (
|
||||
${var.free_memory_aggregator}:system.mem.free{${data.template_file.filter.rendered}} by {region,host} /
|
||||
${var.free_memory_aggregator}:system.mem.total{${data.template_file.filter.rendered}} by {region,host} * 100
|
||||
) < ${var.free_memory_threshold_critical}
|
||||
EOF
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user