MON-27 change monitors
This commit is contained in:
parent
e7b53ccee4
commit
a1f78f071c
21
inputs.tf
21
inputs.tf
@ -81,12 +81,27 @@ variable "elb_backend_latency" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
##apache nginx php
|
##apache nginx php
|
||||||
variable "dd_apache_basics" {
|
variable "dd_nginx" {
|
||||||
default = "disabled"
|
default = "disabled"
|
||||||
}
|
}
|
||||||
variable "dd_nginx_basics" {
|
variable "dd_php_fpm" {
|
||||||
default = "disabled"
|
default = "disabled"
|
||||||
}
|
}
|
||||||
variable "dd_php_basics" {
|
|
||||||
|
variable "dd_apache" {
|
||||||
default = "disabled"
|
default = "disabled"
|
||||||
}
|
}
|
||||||
|
variable "apache_nginx_fpm_config" {
|
||||||
|
type = "map"
|
||||||
|
default = {
|
||||||
|
notify_no_data = false
|
||||||
|
delay = 900
|
||||||
|
}
|
||||||
|
}
|
||||||
|
variable "php_fpm_busy_threshold" {
|
||||||
|
type = "map"
|
||||||
|
default = {
|
||||||
|
warning = 0.8
|
||||||
|
critical = 0.9
|
||||||
|
}
|
||||||
|
}
|
||||||
26
monitors_apache.tf
Normal file
26
monitors_apache.tf
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
resource "datadog_monitor" "Apache_process" {
|
||||||
|
name = "[${var.env}] Nginx process is down on {{host.name}}"
|
||||||
|
message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}"
|
||||||
|
|
||||||
|
|
||||||
|
type = "service check"
|
||||||
|
query = "\"process.up\".over(\"dd_monitoring:enabled\",\"dd_apache:enabled\",\"process:apache\",\"env:${var.env}\").by(\"host\",\"process\", \"app\").last(4).count_by_status()"
|
||||||
|
count = "${var.dd_apache == "enabled" ? 1 : 0 }"
|
||||||
|
|
||||||
|
thresholds = {
|
||||||
|
ok = 1
|
||||||
|
warning = 2
|
||||||
|
critical = 4
|
||||||
|
}
|
||||||
|
|
||||||
|
notify_no_data = "${var.apache_nginx_fpm_config["notify_no_data"]}"
|
||||||
|
evaluation_delay = "${var.apache_nginx_fpm_config["delay"]}"
|
||||||
|
new_host_delay = "${var.apache_nginx_fpm_config["delay"]}"
|
||||||
|
renotify_interval = 60
|
||||||
|
notify_audit = false
|
||||||
|
timeout_h = 0
|
||||||
|
include_tags = true
|
||||||
|
locked = false
|
||||||
|
require_full_window = true
|
||||||
|
no_data_timeframe = 20
|
||||||
|
}
|
||||||
54
monitors_fpm.tf
Normal file
54
monitors_fpm.tf
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
resource "datadog_monitor" "php-fpm_process_idle" {
|
||||||
|
name = "[${var.env}] php_fpm busy worker > 90% on {{host.name}}"
|
||||||
|
message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}"
|
||||||
|
|
||||||
|
|
||||||
|
type = "query alert"
|
||||||
|
query = "avg(last_10m):avg:php_fpm.processes.active{dd_monitoring:enabled,dd_php_fpm:enabled,env:${var.env}} by {host,region,app} / ( avg:php_fpm.processes.idle{dd_monitoring:enabled,dd_php_fpm:enabled,env:${var.env}} by {host,region,app} + avg:php_fpm.processes.active{dd_monitoring:enabled,dd_php_fpm:enabled,env:${var.env}} by {host,region,stack} ) > 0.90"
|
||||||
|
count = "${var.dd_php_fpm == "enabled" ? 1 : 0 }"
|
||||||
|
|
||||||
|
thresholds {
|
||||||
|
warning = "${var.php_fpm_busy_threshold["warning"]}"
|
||||||
|
critical = "${var.php_fpm_busy_threshold["critical"]}"
|
||||||
|
}
|
||||||
|
|
||||||
|
notify_no_data = "${var.apache_nginx_fpm_config["notify_no_data"]}"
|
||||||
|
evaluation_delay = "${var.apache_nginx_fpm_config["delay"]}"
|
||||||
|
new_host_delay = "${var.apache_nginx_fpm_config["delay"]}"
|
||||||
|
renotify_interval = 60
|
||||||
|
notify_audit = false
|
||||||
|
timeout_h = 0
|
||||||
|
include_tags = true
|
||||||
|
locked = false
|
||||||
|
require_full_window = true
|
||||||
|
renotify_interval = 0
|
||||||
|
no_data_timeframe = 20
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
resource "datadog_monitor" "FPM_process" {
|
||||||
|
name = "[${var.env}] FPM process is down on {{host.name}}"
|
||||||
|
message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}"
|
||||||
|
|
||||||
|
|
||||||
|
type = "service check"
|
||||||
|
query = "\"process.up\".over(\"dd_monitoring:enabled\",\"dd_php_fpm:enabled\",\"process:php_fpm\",\"env:${var.env}\").by(\"host\",\"process\", \"app\").last(4).count_by_status()"
|
||||||
|
count = "${var.dd_nginx == "enabled" ? 1 : 0 }"
|
||||||
|
|
||||||
|
thresholds = {
|
||||||
|
ok = 1
|
||||||
|
warning = 2
|
||||||
|
critical = 4
|
||||||
|
}
|
||||||
|
|
||||||
|
notify_no_data = "${var.apache_nginx_fpm_config["notify_no_data"]}"
|
||||||
|
evaluation_delay = "${var.apache_nginx_fpm_config["delay"]}"
|
||||||
|
new_host_delay = "${var.apache_nginx_fpm_config["delay"]}"
|
||||||
|
renotify_interval = 60
|
||||||
|
notify_audit = false
|
||||||
|
timeout_h = 0
|
||||||
|
include_tags = true
|
||||||
|
locked = false
|
||||||
|
require_full_window = true
|
||||||
|
no_data_timeframe = 20
|
||||||
|
}
|
||||||
@ -1,11 +1,11 @@
|
|||||||
resource "datadog_monitor" "Nginx_process" {
|
resource "datadog_monitor" "Nginx_process" {
|
||||||
name = "Nginx process is down on {{host.name}}"
|
name = "[${var.env}] Nginx process is down on {{host.name}}"
|
||||||
message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}"
|
message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}"
|
||||||
|
|
||||||
|
|
||||||
type = "service check"
|
type = "service check"
|
||||||
query = "process.up.over(dd_monitoring:enabled,process:nginx).exclude(dd_custom_nginx:enabled).last(4).count_by_status()"
|
query = "\"process.up\".over(\"dd_monitoring:enabled\",\"dd_nginx:enabled\",\"process:nginx\",\"env:${var.env}\").by(\"host\",\"process\", \"app\").last(4).count_by_status()"
|
||||||
count = "${var.dd_nginx_basics == "true" ? 1 : 0 }"
|
count = "${var.dd_nginx == "enabled" ? 1 : 0 }"
|
||||||
|
|
||||||
thresholds = {
|
thresholds = {
|
||||||
ok = 1
|
ok = 1
|
||||||
@ -13,14 +13,14 @@ resource "datadog_monitor" "Nginx_process" {
|
|||||||
critical = 4
|
critical = 4
|
||||||
}
|
}
|
||||||
|
|
||||||
notify_no_data = false
|
notify_no_data = "${var.apache_nginx_fpm_config["notify_no_data"]}"
|
||||||
|
evaluation_delay = "${var.apache_nginx_fpm_config["delay"]}"
|
||||||
|
new_host_delay = "${var.apache_nginx_fpm_config["delay"]}"
|
||||||
|
renotify_interval = 60
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
timeout_h = 0
|
timeout_h = 0
|
||||||
include_tags = true
|
include_tags = true
|
||||||
locked = false
|
locked = false
|
||||||
require_full_window = true
|
require_full_window = true
|
||||||
new_host_delay = 300
|
|
||||||
notify_no_data = false
|
|
||||||
renotify_interval = 0
|
|
||||||
no_data_timeframe = 20
|
no_data_timeframe = 20
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,27 +0,0 @@
|
|||||||
resource "datadog_monitor" "php-fpm_process_idle" {
|
|
||||||
name = "php_fpm busy worker > 99% on {{host.name}}"
|
|
||||||
message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}"
|
|
||||||
|
|
||||||
|
|
||||||
type = "service check"
|
|
||||||
query = "avg(last_10m):avg:php_fpm.processes.active{\"dd_monitoring:enabled\",\"dd_php_basics:enabled\",\"!dd_custom_php:enabled\"} by {host} / ( avg:php_fpm.processes.idle{\"dd_monitoring:enabled\",\"dd_php_basics:enabled\",\"!dd_custom_php:enabled\"} by {host} + avg:php_fpm.processes.active{\"dd_monitoring:enabled\",\"dd_php_basics:enabled\",\"!dd_custom_php:enabled\"} by {host} ) > 0.99"
|
|
||||||
count = "${var.dd_php_basics == "true" ? 1 : 0 }"
|
|
||||||
|
|
||||||
thresholds {
|
|
||||||
warning = 0.95
|
|
||||||
critical = 0.99
|
|
||||||
}
|
|
||||||
|
|
||||||
notify_no_data = false
|
|
||||||
renotify_interval = 60
|
|
||||||
notify_audit = false
|
|
||||||
timeout_h = 0
|
|
||||||
include_tags = true
|
|
||||||
locked = false
|
|
||||||
require_full_window = true
|
|
||||||
new_host_delay = 300
|
|
||||||
renotify_interval = 0
|
|
||||||
no_data_timeframe = 20
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user