diff --git a/inputs.tf b/inputs.tf index 02cf08f..03b1829 100644 --- a/inputs.tf +++ b/inputs.tf @@ -49,6 +49,7 @@ variable "rds_mem_threshold" { } } + ## ELB variable "dd_aws_elb" { default = "disable" @@ -77,4 +78,30 @@ variable "elb_backend_latency" { warning = 1000 critical = 5000 } +} + +##apache nginx php +variable "dd_nginx" { + default = "disabled" +} +variable "dd_php_fpm" { + default = "disabled" +} + +variable "dd_apache" { + default = "disabled" +} +variable "apache_nginx_fpm_config" { + type = "map" + default = { + notify_no_data = false + delay = 900 + } +} +variable "php_fpm_busy_threshold" { + type = "map" + default = { + warning = 0.8 + critical = 0.9 + } } \ No newline at end of file diff --git a/monitors_apache.tf b/monitors_apache.tf new file mode 100644 index 0000000..b318de2 --- /dev/null +++ b/monitors_apache.tf @@ -0,0 +1,26 @@ +resource "datadog_monitor" "Apache_process" { + name = "[${var.env}] Nginx process is down on {{host.name}}" + message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}" + + + type = "service check" + query = "\"process.up\".over(\"dd_monitoring:enabled\",\"dd_apache:enabled\",\"process:apache\",\"env:${var.env}\").by(\"host\",\"process\", \"app\").last(4).count_by_status()" + count = "${var.dd_apache == "enabled" ? 1 : 0 }" + + thresholds = { + ok = 1 + warning = 2 + critical = 4 + } + + notify_no_data = "${var.apache_nginx_fpm_config["notify_no_data"]}" + evaluation_delay = "${var.apache_nginx_fpm_config["delay"]}" + new_host_delay = "${var.apache_nginx_fpm_config["delay"]}" + renotify_interval = 60 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + no_data_timeframe = 20 +} diff --git a/monitors_fpm.tf b/monitors_fpm.tf new file mode 100644 index 0000000..cff10a0 --- /dev/null +++ b/monitors_fpm.tf @@ -0,0 +1,54 @@ +resource "datadog_monitor" "php-fpm_process_idle" { + name = "[${var.env}] php_fpm busy worker > 90% on {{host.name}}" + message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}" + + + type = "query alert" + query = "avg(last_10m):avg:php_fpm.processes.active{dd_monitoring:enabled,dd_php_fpm:enabled,env:${var.env}} by {host,region,app} / ( avg:php_fpm.processes.idle{dd_monitoring:enabled,dd_php_fpm:enabled,env:${var.env}} by {host,region,app} + avg:php_fpm.processes.active{dd_monitoring:enabled,dd_php_fpm:enabled,env:${var.env}} by {host,region,stack} ) > 0.90" + count = "${var.dd_php_fpm == "enabled" ? 1 : 0 }" + + thresholds { + warning = "${var.php_fpm_busy_threshold["warning"]}" + critical = "${var.php_fpm_busy_threshold["critical"]}" + } + + notify_no_data = "${var.apache_nginx_fpm_config["notify_no_data"]}" + evaluation_delay = "${var.apache_nginx_fpm_config["delay"]}" + new_host_delay = "${var.apache_nginx_fpm_config["delay"]}" + renotify_interval = 60 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + renotify_interval = 0 + no_data_timeframe = 20 +} + + +resource "datadog_monitor" "FPM_process" { + name = "[${var.env}] FPM process is down on {{host.name}}" + message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}" + + + type = "service check" + query = "\"process.up\".over(\"dd_monitoring:enabled\",\"dd_php_fpm:enabled\",\"process:php_fpm\",\"env:${var.env}\").by(\"host\",\"process\", \"app\").last(4).count_by_status()" + count = "${var.dd_nginx == "enabled" ? 1 : 0 }" + + thresholds = { + ok = 1 + warning = 2 + critical = 4 + } + + notify_no_data = "${var.apache_nginx_fpm_config["notify_no_data"]}" + evaluation_delay = "${var.apache_nginx_fpm_config["delay"]}" + new_host_delay = "${var.apache_nginx_fpm_config["delay"]}" + renotify_interval = 60 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + no_data_timeframe = 20 +} diff --git a/monitors_nginx.tf b/monitors_nginx.tf new file mode 100644 index 0000000..01a95fe --- /dev/null +++ b/monitors_nginx.tf @@ -0,0 +1,26 @@ +resource "datadog_monitor" "Nginx_process" { + name = "[${var.env}] Nginx process is down on {{host.name}}" + message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}" + + + type = "service check" + query = "\"process.up\".over(\"dd_monitoring:enabled\",\"dd_nginx:enabled\",\"process:nginx\",\"env:${var.env}\").by(\"host\",\"process\", \"app\").last(4).count_by_status()" + count = "${var.dd_nginx == "enabled" ? 1 : 0 }" + + thresholds = { + ok = 1 + warning = 2 + critical = 4 + } + + notify_no_data = "${var.apache_nginx_fpm_config["notify_no_data"]}" + evaluation_delay = "${var.apache_nginx_fpm_config["delay"]}" + new_host_delay = "${var.apache_nginx_fpm_config["delay"]}" + renotify_interval = 60 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + no_data_timeframe = 20 +}