MON-96 - Updated databases, middleware and system monitors with new best practice

This commit is contained in:
Alexandre Gaillet 2018-03-20 14:16:23 +01:00 committed by Quentin Manfroi
parent bb88248053
commit 9cc55717e1
15 changed files with 152 additions and 10 deletions

View File

@ -79,3 +79,5 @@ Inputs
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
| message | Message sent when an alert is triggered | string | - | yes |
| mongodb_replicaset_message | Custom message for Mongodb replicaset monitor | string | `` | no |
| mongodb_replicaset_silenced | Groups to mute for Mongodb replicaset monitor | map | `<map>` | no |

View File

@ -23,3 +23,15 @@ variable "filter_tags_custom" {
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
default = "*"
}
variable "mongodb_replicaset_silenced" {
description = "Groups to mute for Mongodb replicaset monitor"
type = "map"
default = {}
}
variable "mongodb_replicaset_message" {
description = "Custom message for Mongodb replicaset monitor"
type = "string"
default = ""
}

View File

@ -8,7 +8,7 @@ data "template_file" "filter" {
resource "datadog_monitor" "mongodb_replicaset_state" {
name = "[${var.environment}] Member down in the replica set"
message = "${var.message}"
message = "${coalesce(var.mongodb_replicaset_message, var.message)}"
query = <<EOF
avg(last_5m): (
@ -27,5 +27,7 @@ resource "datadog_monitor" "mongodb_replicaset_state" {
include_tags = true
require_full_window = true
silenced = "${var.mongodb_replicaset_silenced}"
tags = ["env:${var.environment}", "resource:mongodb"]
}

View File

@ -24,6 +24,8 @@ Inputs
| Name | Description | Type | Default | Required |
|------|-------------|:----:|:-----:|:-----:|
| apache_process_message | Custom message for Apache process monitor | string | `` | no |
| apache_process_silenced | Groups to mute for Apache process monitor | map | `<map>` | no |
| environment | Architecture Environment | string | - | yes |
| evaluation_delay | Delay in seconds for the metric evaluation | string | `15` | no |
| message | Message sent when an alert is triggered | string | - | yes |

View File

@ -16,3 +16,14 @@ variable "message" {
# Apache Middleware specific
variable "apache_process_silenced" {
description = "Groups to mute for Apache process monitor"
type = "map"
default = {}
}
variable "apache_process_message" {
description = "Custom message for Apache process monitor"
type = "string"
default = ""
}

View File

@ -1,6 +1,6 @@
resource "datadog_monitor" "datadog_apache_process" {
name = "[${var.environment}] Can't connect to apache vhost status"
message = "${var.message}"
message = "${coalesce(var.apache_process_message, var.message)}"
type = "service check"
query = "\"apache.can_connect\".over(\"dd_monitoring:enabled\",\"dd_apache:enabled\",\"env:${var.environment}\").by(\"host\",\"port\").last(6).count_by_status()"
@ -22,5 +22,7 @@ resource "datadog_monitor" "datadog_apache_process" {
require_full_window = true
no_data_timeframe = 20
silenced = "${var.apache_process_silenced}"
tags = ["env:${var.environment}", "resource:apache"]
}

View File

@ -27,3 +27,5 @@ Inputs
| environment | Architecture Environment | string | - | yes |
| evaluation_delay | Delay in seconds for the metric evaluation | string | `15` | no |
| message | Message sent when an alert is triggered | string | - | yes |
| nginx_process_message | Custom message for Nginx process monitor | string | `` | no |
| nginx_process_silenced | Groups to mute for Nginx process monitor | map | `<map>` | no |

View File

@ -16,3 +16,14 @@ variable "message" {
# Nginx Middleware specific
variable "nginx_process_silenced" {
description = "Groups to mute for Nginx process monitor"
type = "map"
default = {}
}
variable "nginx_process_message" {
description = "Custom message for Nginx process monitor"
type = "string"
default = ""
}

View File

@ -1,6 +1,6 @@
resource "datadog_monitor" "datadog_nginx_process" {
name = "[${var.environment}] Can't connect to nginx vhost status"
message = "${var.message}"
message = "${coalesce(var.nginx_process_message, var.message)}"
type = "service check"
query = "\"nginx.can_connect\".over(\"dd_monitoring:enabled\",\"dd_nginx:enabled\",\"env:${var.environment}\").by(\"host\",\"port\").last(6).count_by_status()"
@ -22,5 +22,7 @@ resource "datadog_monitor" "datadog_nginx_process" {
require_full_window = true
no_data_timeframe = 20
silenced = "${var.nginx_process_silenced}"
tags = ["env:${var.environment}", "resource:nginx"]
}

View File

@ -31,5 +31,9 @@ Inputs
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
| message | Message sent when an alert is triggered | string | - | yes |
| php_fpm_busy_message | Custom message for PHP FPM busy worker monitor | string | `` | no |
| php_fpm_busy_silenced | Groups to mute for PHP FPM busy worker monitor | map | `<map>` | no |
| php_fpm_busy_threshold_critical | php fpm busy critical threshold | string | `0.9` | no |
| php_fpm_busy_threshold_warning | php fpm busy warning threshold | string | `0.8` | no |
| php_fpm_process_message | Custom message for PHP FPM process monitor | string | `` | no |
| php_fpm_process_silenced | Groups to mute for PHP FPM process monitor | map | `<map>` | no |

View File

@ -31,6 +31,18 @@ variable "filter_tags_custom" {
# PHP FPM Middleware specific
variable "php_fpm_busy_silenced" {
description = "Groups to mute for PHP FPM busy worker monitor"
type = "map"
default = {}
}
variable "php_fpm_busy_message" {
description = "Custom message for PHP FPM busy worker monitor"
type = "string"
default = ""
}
variable "php_fpm_busy_threshold_warning" {
description = "php fpm busy warning threshold"
default = 0.8
@ -40,3 +52,15 @@ variable "php_fpm_busy_threshold_critical" {
description = "php fpm busy critical threshold"
default = 0.9
}
variable "php_fpm_process_silenced" {
description = "Groups to mute for PHP FPM process monitor"
type = "map"
default = {}
}
variable "php_fpm_process_message" {
description = "Custom message for PHP FPM process monitor"
type = "string"
default = ""
}

View File

@ -8,7 +8,7 @@ data "template_file" "filter" {
resource "datadog_monitor" "datadog_php_fpm_process_idle" {
name = "[${var.environment}] php_fpm busy worker {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
message = "${var.message}"
message = "${coalesce(var.php_fpm_busy_message, var.message)}"
type = "metric alert"
@ -35,12 +35,14 @@ resource "datadog_monitor" "datadog_php_fpm_process_idle" {
require_full_window = true
no_data_timeframe = 20
silenced = "${var.php_fpm_busy_silenced}"
tags = ["env:${var.environment}", "resource:php-fpm"]
}
resource "datadog_monitor" "datadog_fpm_process" {
name = "[${var.environment}] Can't connect to php-fpm"
message = "${var.message}"
message = "${coalesce(var.php_fpm_process_message, var.message)}"
type = "service check"
query = "\"php_fpm.can_ping\".over(\"dd_monitoring:enabled\",\"dd_php_fpm:enabled\",\"env:${var.environment}\").by(\"host\",\"port\").last(6).count_by_status()"
@ -62,5 +64,7 @@ resource "datadog_monitor" "datadog_fpm_process" {
require_full_window = true
no_data_timeframe = 20
silenced = "${var.php_fpm_process_silenced}"
tags = ["env:${var.environment}", "resource:php-fpm"]
}

View File

@ -24,6 +24,8 @@ Inputs
| Name | Description | Type | Default | Required |
|------|-------------|:----:|:-----:|:-----:|
| cpu_high_message | Custom message for CPU high monitor | string | `` | no |
| cpu_high_silenced | Groups to mute for CPU high monitor | map | `<map>` | no |
| cpu_high_threshold_critical | CPU high critical threshold | string | `95` | no |
| cpu_high_threshold_warning | CPU high warning threshold | string | `80` | no |
| cpu_high_timeframe | CPU high timeframe | string | `last_5m` | no |
@ -31,10 +33,16 @@ Inputs
| evaluation_delay | Delay in seconds for the metric evaluation | string | `600` | no |
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
| free_disk_inodes_message | Custom message for Free disk inodes monitor | string | `` | no |
| free_disk_inodes_silenced | Groups to mute for Free disk inodes monitor | map | `<map>` | no |
| free_disk_inodes_threshold_critical | Free disk space critical threshold | string | `5` | no |
| free_disk_inodes_threshold_warning | Free disk space warning threshold | string | `10` | no |
| free_disk_space_message | Custom message for Free diskspace monitor | string | `` | no |
| free_disk_space_silenced | Groups to mute for Free diskspace monitor | map | `<map>` | no |
| free_disk_space_threshold_critical | Free disk space critical threshold | string | `5` | no |
| free_disk_space_threshold_warning | Free disk space warning threshold | string | `10` | no |
| free_memory_message | Custom message for Free memory monitor | string | `` | no |
| free_memory_silenced | Groups to mute for Free memory monitor | map | `<map>` | no |
| free_memory_threshold_critical | Free disk space critical threshold | string | `5` | no |
| free_memory_threshold_warning | Free disk space warning threshold | string | `10` | no |
| message | Message sent when an alert is triggered | string | - | yes |

View File

@ -26,6 +26,18 @@ variable "filter_tags_custom" {
# Custom CPU instance specific
variable "cpu_high_silenced" {
description = "Groups to mute for CPU high monitor"
type = "map"
default = {}
}
variable "cpu_high_message" {
description = "Custom message for CPU high monitor"
type = "string"
default = ""
}
variable "cpu_high_timeframe" {
description = "CPU high timeframe"
default = "last_5m"
@ -41,6 +53,18 @@ variable "cpu_high_threshold_critical" {
default = 95
}
variable "free_disk_space_silenced" {
description = "Groups to mute for Free diskspace monitor"
type = "map"
default = {}
}
variable "free_disk_space_message" {
description = "Custom message for Free diskspace monitor"
type = "string"
default = ""
}
variable "free_disk_space_threshold_warning" {
description = "Free disk space warning threshold"
default = 10
@ -51,6 +75,18 @@ variable "free_disk_space_threshold_critical" {
default = 5
}
variable "free_disk_inodes_silenced" {
description = "Groups to mute for Free disk inodes monitor"
type = "map"
default = {}
}
variable "free_disk_inodes_message" {
description = "Custom message for Free disk inodes monitor"
type = "string"
default = ""
}
variable "free_disk_inodes_threshold_warning" {
description = "Free disk space warning threshold"
default = 10
@ -61,6 +97,18 @@ variable "free_disk_inodes_threshold_critical" {
default = 5
}
variable "free_memory_silenced" {
description = "Groups to mute for Free memory monitor"
type = "map"
default = {}
}
variable "free_memory_message" {
description = "Custom message for Free memory monitor"
type = "string"
default = ""
}
variable "free_memory_threshold_warning" {
description = "Free disk space warning threshold"
default = 10

View File

@ -8,7 +8,7 @@ data "template_file" "filter" {
resource "datadog_monitor" "datadog_cpu_too_high" {
name = "[${var.environment}] CPU usage {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
message = "${var.message}"
message = "${coalesce(var.cpu_high_message, var.message)}"
query = <<EOF
min(${var.cpu_high_timeframe}): (
@ -34,11 +34,13 @@ resource "datadog_monitor" "datadog_cpu_too_high" {
locked = false
require_full_window = true
no_data_timeframe = 20
silenced = "${var.cpu_high_silenced}"
}
resource "datadog_monitor" "datadog_free_disk_space_too_low" {
name = "[${var.environment}] Free disk space {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
message = "${var.message}"
message = "${coalesce(var.free_disk_space_message, var.message)}"
query = <<EOF
min(last_5m): (
@ -65,11 +67,13 @@ resource "datadog_monitor" "datadog_free_disk_space_too_low" {
locked = false
require_full_window = true
no_data_timeframe = 20
silenced = "${var.free_disk_space_silenced}"
}
resource "datadog_monitor" "datadog_free_disk_space_inodes_too_low" {
name = "[${var.environment}] Free disk inodes {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
message = "${var.message}"
message = "${coalesce(var.free_disk_inodes_message, var.message)}"
query = <<EOF
min(last_5m): (
@ -96,11 +100,13 @@ resource "datadog_monitor" "datadog_free_disk_space_inodes_too_low" {
locked = false
require_full_window = true
no_data_timeframe = 20
silenced = "${var.free_disk_inodes_silenced}"
}
resource "datadog_monitor" "datadog_free_memory" {
name = "[${var.environment}] Free memory {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
message = "${var.message}"
message = "${coalesce(var.free_memory_message, var.message)}"
query = <<EOF
min(last_1m): (
@ -128,4 +134,6 @@ resource "datadog_monitor" "datadog_free_memory" {
locked = false
require_full_window = true
no_data_timeframe = 20
silenced = "${var.free_memory_silenced}"
}