From 57b2cbc42549a3a6446686fc7dc556606e3ce27c Mon Sep 17 00:00:00 2001 From: vincent EL KHATIB Date: Fri, 7 Jul 2017 14:31:19 +0200 Subject: [PATCH 1/3] MON-27 added apache nginx php --- monitors-apache-basics.tf | 80 +++++++++++++++++++++++++++++++++++++++ monitors-nginx-basics.tf | 26 +++++++++++++ monitors-php-basics.tf | 27 +++++++++++++ 3 files changed, 133 insertions(+) create mode 100644 monitors-apache-basics.tf create mode 100644 monitors-nginx-basics.tf create mode 100644 monitors-php-basics.tf diff --git a/monitors-apache-basics.tf b/monitors-apache-basics.tf new file mode 100644 index 0000000..e908ec3 --- /dev/null +++ b/monitors-apache-basics.tf @@ -0,0 +1,80 @@ +resource "datadog_monitor" "apache_process" { + name = "Apache process is down on {{host.name}}" + message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}" + + + type = "service check" + query = "\"process.up\".over(\"dd_monitoring:enabled"\,\"dd_apache_basics:enabled\",\"process:apache2\").exclude(\"dd_custom_apache:enabled\").last(4).count_by_status()" + count = "${var.apache_basics == "true" ? 1 : 0 }" + + thresholds = { + ok = 1 + warning = 2 + critical = 4 + } + + notify_no_data = false + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = 300 + notify_no_data = false + renotify_interval = 0 + no_data_timeframe = 20 +} + +resource "datadog_monitor" "apache_worker_nat" { + name = "Apache proxy busy worker > 99% on {{host.name}}" + message = "{{#is_alert}}\n${var.hno_escalation_group}\n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}" + + query = "avg(last_10m):avg:php_fpm.process.active{\"dd_monitoring:enabled\",\"dd_apache_basics:enabled\",\"!dd_custom_apache:enabled\"} by {host} / ( avg:php_fpm.process.idle{\"dd_monitoring:enabled\",\"dd_apache_basics:enabled\",\"!dd_custom_apache:enabled\"} by {host} + avg:apache.performance.busy_workers{\"dd_monitoring:enabled\",\"dd_apache_basics:enabled\",\"!dd_custom_apache:enabled\"} by {host} ) > 0.99" + type = "query alert" + + count = "${var.apache_basics == "true" ? 1 : 0 }" + + thresholds { + warning = 0.95 + critical = 0.99 + } + + notify_no_data = false + renotify_interval = 60 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = 300 + notify_no_data = false + renotify_interval = 0 + no_data_timeframe = 20 +} + +resource "datadog_monitor" "apache_can_connect" { + name = "Apache proxy is down on {{host.name}}" + message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}" + + query = "\"apache.can_connect\".over\"dd_monitoring:enabled"\,\"dd_apache_basics:enabled\",\"process:apache2\").exclude(\"dd_custom_apache:enabled\").last(1).count_by_status()" + type = "service check" + + count = "${var.apache_basics == "true" ? 1 : 0 }" + + notify_no_data = false + renotify_interval = 60 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = 300 + notify_no_data = false + renotify_interval = 0 + no_data_timeframe = 20 +} + + +//enchanced monitoring étant donné + + diff --git a/monitors-nginx-basics.tf b/monitors-nginx-basics.tf new file mode 100644 index 0000000..33604cd --- /dev/null +++ b/monitors-nginx-basics.tf @@ -0,0 +1,26 @@ +resource "datadog_monitor" "Nginx_process" { + name = "Nginx process is down on {{host.name}}" + message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}" + + + type = "service check" + query = "\"process.up\".over(\"dd_monitoring:enabled"\,\"process:nginx\").exclude(\"dd_custom_nginx:enabled\").last(4).count_by_status()" + count = "${var.nginx_basics == "true" ? 1 : 0 }" + + thresholds = { + ok = 1 + warning = 2 + critical = 4 + } + + notify_no_data = false + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = 300 + notify_no_data = false + renotify_interval = 0 + no_data_timeframe = 20 +} diff --git a/monitors-php-basics.tf b/monitors-php-basics.tf new file mode 100644 index 0000000..99bb696 --- /dev/null +++ b/monitors-php-basics.tf @@ -0,0 +1,27 @@ +resource "datadog_monitor" "php-fpm_process_idle" { + name = "php_fpm busy worker > 99% on {{host.name}}" + message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}" + + + type = "service check" + query = "avg(last_10m):avg:php_fpm.processes.active{\"dd_monitoring:enabled\",\"dd_php_basics:enabled\",\"!dd_custom_php:enabled\"} by {host} / ( avg:php_fpm.processes.idle{\"dd_monitoring:enabled\",\"dd_php_basics:enabled\",\"!dd_custom_php:enabled\"} by {host} + avg:php_fpm.processes.active{\"dd_monitoring:enabled\",\"dd_php_basics:enabled\",\"!dd_custom_php:enabled\"} by {host} ) > 0.99" + count = "${var.apache_basics == "true" ? 1 : 0 }" + + thresholds { + warning = 0.95 + critical = 0.99 + } + + notify_no_data = false + renotify_interval = 60 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = 300 + renotify_interval = 0 + no_data_timeframe = 20 +} + + From 35037922dd412f17fff6d92acd6d1c805eacfb98 Mon Sep 17 00:00:00 2001 From: vincent EL KHATIB Date: Fri, 7 Jul 2017 17:33:32 +0200 Subject: [PATCH 2/3] MON-30 add rds myusql --- inputs.tf | 4 +++ monitors-rds_mysql-basics.tf | 57 ++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 monitors-rds_mysql-basics.tf diff --git a/inputs.tf b/inputs.tf index 3959621..9023a2c 100644 --- a/inputs.tf +++ b/inputs.tf @@ -11,3 +11,7 @@ variable "dd_custom_cpu" { status = "enabled" } } + +variable = "rds-mysql_basic" { + default = "disabled" +} diff --git a/monitors-rds_mysql-basics.tf b/monitors-rds_mysql-basics.tf new file mode 100644 index 0000000..fca6a0d --- /dev/null +++ b/monitors-rds_mysql-basics.tf @@ -0,0 +1,57 @@ + + + +resource "datadog_monitor" "rds-mysql_cpu_80_15min" { + name = "RDS Mysql CPU High > 90% for 15 min" + message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}" + count = "${var.rds-mysql_basics == "true" ? 1 : 0 }" + + query = "avg(last_15m):aws.rds.cpuutilization.total{\"dd_monitoring:enabled\",\"dd_rds-mysql_basics:enabled\",\"!dd_custom_rds-mysql:enabled\"} by {host} > 90" + type = "query alert" + + thresholds { + warning = 0.8 + critical = 0.9 + } + + notify_no_data = false + renotify_interval = 60 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = 300 + notify_no_data = false + renotify_interval = 0 + no_data_timeframe = 20 +} + +resource "datadog_monitor" "mysql_rds_free_space_low" { + name = "rds mysql free space low < 10 % on {{host.name}}" + message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}" + + + type = "service check" + query = "avg(last_10m): 1 - avg:aws.rds.free_storage_space{\"dd_monitoring:enabled\",\"dd_rds-mysql_basics:enabled\",\"!dd_custom_rds-mysql:enabled\"} by {host} / ( avg:aws.rds.total_storage_space{\"dd_monitoring:enabled\",\"dd_rds-mysql_basics:enabled\",\"!dd_custom_rds-mysql:enabled\"} by {host} - avg:aws.rds.free_storage_space{} by {host} ) < 0.1" + count = "${var.rds-mysql_basics == "true" ? 1 : 0 }" + + thresholds { + warning = 0.2 + critical = 0.1 + } + + notify_no_data = false + renotify_interval = 60 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = 300 + renotify_interval = 0 + no_data_timeframe = 20 + + + +} \ No newline at end of file From adf166a3c108888f6c5876bf156b6bf4fe113ae2 Mon Sep 17 00:00:00 2001 From: vincent EL KHATIB Date: Mon, 10 Jul 2017 11:14:58 +0200 Subject: [PATCH 3/3] MON-30 fix files --- monitors-apache-basics.tf | 80 --------------------------------------- monitors-nginx-basics.tf | 26 ------------- monitors-php-basics.tf | 27 ------------- 3 files changed, 133 deletions(-) delete mode 100644 monitors-apache-basics.tf delete mode 100644 monitors-nginx-basics.tf delete mode 100644 monitors-php-basics.tf diff --git a/monitors-apache-basics.tf b/monitors-apache-basics.tf deleted file mode 100644 index e908ec3..0000000 --- a/monitors-apache-basics.tf +++ /dev/null @@ -1,80 +0,0 @@ -resource "datadog_monitor" "apache_process" { - name = "Apache process is down on {{host.name}}" - message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}" - - - type = "service check" - query = "\"process.up\".over(\"dd_monitoring:enabled"\,\"dd_apache_basics:enabled\",\"process:apache2\").exclude(\"dd_custom_apache:enabled\").last(4).count_by_status()" - count = "${var.apache_basics == "true" ? 1 : 0 }" - - thresholds = { - ok = 1 - warning = 2 - critical = 4 - } - - notify_no_data = false - notify_audit = false - timeout_h = 0 - include_tags = true - locked = false - require_full_window = true - new_host_delay = 300 - notify_no_data = false - renotify_interval = 0 - no_data_timeframe = 20 -} - -resource "datadog_monitor" "apache_worker_nat" { - name = "Apache proxy busy worker > 99% on {{host.name}}" - message = "{{#is_alert}}\n${var.hno_escalation_group}\n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}" - - query = "avg(last_10m):avg:php_fpm.process.active{\"dd_monitoring:enabled\",\"dd_apache_basics:enabled\",\"!dd_custom_apache:enabled\"} by {host} / ( avg:php_fpm.process.idle{\"dd_monitoring:enabled\",\"dd_apache_basics:enabled\",\"!dd_custom_apache:enabled\"} by {host} + avg:apache.performance.busy_workers{\"dd_monitoring:enabled\",\"dd_apache_basics:enabled\",\"!dd_custom_apache:enabled\"} by {host} ) > 0.99" - type = "query alert" - - count = "${var.apache_basics == "true" ? 1 : 0 }" - - thresholds { - warning = 0.95 - critical = 0.99 - } - - notify_no_data = false - renotify_interval = 60 - notify_audit = false - timeout_h = 0 - include_tags = true - locked = false - require_full_window = true - new_host_delay = 300 - notify_no_data = false - renotify_interval = 0 - no_data_timeframe = 20 -} - -resource "datadog_monitor" "apache_can_connect" { - name = "Apache proxy is down on {{host.name}}" - message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}" - - query = "\"apache.can_connect\".over\"dd_monitoring:enabled"\,\"dd_apache_basics:enabled\",\"process:apache2\").exclude(\"dd_custom_apache:enabled\").last(1).count_by_status()" - type = "service check" - - count = "${var.apache_basics == "true" ? 1 : 0 }" - - notify_no_data = false - renotify_interval = 60 - notify_audit = false - timeout_h = 0 - include_tags = true - locked = false - require_full_window = true - new_host_delay = 300 - notify_no_data = false - renotify_interval = 0 - no_data_timeframe = 20 -} - - -//enchanced monitoring étant donné - - diff --git a/monitors-nginx-basics.tf b/monitors-nginx-basics.tf deleted file mode 100644 index 33604cd..0000000 --- a/monitors-nginx-basics.tf +++ /dev/null @@ -1,26 +0,0 @@ -resource "datadog_monitor" "Nginx_process" { - name = "Nginx process is down on {{host.name}}" - message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}" - - - type = "service check" - query = "\"process.up\".over(\"dd_monitoring:enabled"\,\"process:nginx\").exclude(\"dd_custom_nginx:enabled\").last(4).count_by_status()" - count = "${var.nginx_basics == "true" ? 1 : 0 }" - - thresholds = { - ok = 1 - warning = 2 - critical = 4 - } - - notify_no_data = false - notify_audit = false - timeout_h = 0 - include_tags = true - locked = false - require_full_window = true - new_host_delay = 300 - notify_no_data = false - renotify_interval = 0 - no_data_timeframe = 20 -} diff --git a/monitors-php-basics.tf b/monitors-php-basics.tf deleted file mode 100644 index 99bb696..0000000 --- a/monitors-php-basics.tf +++ /dev/null @@ -1,27 +0,0 @@ -resource "datadog_monitor" "php-fpm_process_idle" { - name = "php_fpm busy worker > 99% on {{host.name}}" - message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}\n{{#is_warning}}\n${var.ho_escalation_group} \n{{/is_warning}} \n{{#is_warning_recovery}}\n${var.ho_escalation_group}\n{{/is_warning_recovery}}" - - - type = "service check" - query = "avg(last_10m):avg:php_fpm.processes.active{\"dd_monitoring:enabled\",\"dd_php_basics:enabled\",\"!dd_custom_php:enabled\"} by {host} / ( avg:php_fpm.processes.idle{\"dd_monitoring:enabled\",\"dd_php_basics:enabled\",\"!dd_custom_php:enabled\"} by {host} + avg:php_fpm.processes.active{\"dd_monitoring:enabled\",\"dd_php_basics:enabled\",\"!dd_custom_php:enabled\"} by {host} ) > 0.99" - count = "${var.apache_basics == "true" ? 1 : 0 }" - - thresholds { - warning = 0.95 - critical = 0.99 - } - - notify_no_data = false - renotify_interval = 60 - notify_audit = false - timeout_h = 0 - include_tags = true - locked = false - require_full_window = true - new_host_delay = 300 - renotify_interval = 0 - no_data_timeframe = 20 -} - -