From 8bf6f4154341218c36bc1fec21931a93d56df5d6 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Thu, 22 Feb 2018 12:13:05 +0100 Subject: [PATCH] MON-96 - Updated all monitors to follow recommendations --- cloud/aws/elasticsearch/monitors-elasticsearch.tf | 4 ++-- cloud/aws/rds/monitors-rds.tf | 4 ++-- middleware/apache/README.md | 1 - middleware/apache/inputs.tf | 4 ---- middleware/apache/monitors-apache.tf | 8 ++++---- middleware/nginx/README.md | 1 - middleware/nginx/inputs.tf | 4 ---- middleware/nginx/monitors-nginx.tf | 8 ++++---- middleware/php-fpm/monitors-fpm.tf | 12 ++++++------ system/generic/monitors-custom-cpu.tf | 8 ++++---- system/linux/monitors-linux-basics.tf | 4 ++-- 11 files changed, 24 insertions(+), 34 deletions(-) diff --git a/cloud/aws/elasticsearch/monitors-elasticsearch.tf b/cloud/aws/elasticsearch/monitors-elasticsearch.tf index 8880a02..5a55f4b 100644 --- a/cloud/aws/elasticsearch/monitors-elasticsearch.tf +++ b/cloud/aws/elasticsearch/monitors-elasticsearch.tf @@ -33,7 +33,7 @@ EOF timeout_h = 0 include_tags = true locked = false - require_full_window = true + require_full_window = false new_host_delay = "${var.evaluation_delay}" no_data_timeframe = 20 @@ -66,7 +66,7 @@ EOF timeout_h = 0 include_tags = true locked = false - require_full_window = true + require_full_window = false new_host_delay = "${var.evaluation_delay}" no_data_timeframe = 20 diff --git a/cloud/aws/rds/monitors-rds.tf b/cloud/aws/rds/monitors-rds.tf index dffb178..430cf37 100644 --- a/cloud/aws/rds/monitors-rds.tf +++ b/cloud/aws/rds/monitors-rds.tf @@ -30,7 +30,7 @@ EOF timeout_h = 0 include_tags = true locked = false - require_full_window = true + require_full_window = false new_host_delay = "${var.evaluation_delay}" no_data_timeframe = 20 @@ -62,7 +62,7 @@ EOF timeout_h = 0 include_tags = true locked = false - require_full_window = true + require_full_window = false new_host_delay = "${var.evaluation_delay}" no_data_timeframe = 20 diff --git a/middleware/apache/README.md b/middleware/apache/README.md index 4d9c00f..aaf5630 100644 --- a/middleware/apache/README.md +++ b/middleware/apache/README.md @@ -24,7 +24,6 @@ Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| dd_apache | | string | `disabled` | no | | environment | Architecture Environment | string | - | yes | | evaluation_delay | Delay in seconds for the metric evaluation | string | `15` | no | | message | Message sent when an alert is triggered | string | - | yes | diff --git a/middleware/apache/inputs.tf b/middleware/apache/inputs.tf index 0c3b10e..ca3d259 100644 --- a/middleware/apache/inputs.tf +++ b/middleware/apache/inputs.tf @@ -15,7 +15,3 @@ variable "message" { } # Apache Middleware specific - -variable "dd_apache" { - default = "disabled" -} diff --git a/middleware/apache/monitors-apache.tf b/middleware/apache/monitors-apache.tf index 11f4624..2fc3126 100644 --- a/middleware/apache/monitors-apache.tf +++ b/middleware/apache/monitors-apache.tf @@ -3,7 +3,7 @@ resource "datadog_monitor" "Apache_process" { message = "${var.message}" type = "service check" - query = "\"process.up\".over(\"dd_monitoring:enabled\",\"dd_apache:enabled\",\"process:apache\",\"env:${var.environment}\").by(\"host\",\"process\", \"app\").last(4).count_by_status()" + query = "\"apache.can_connect\".over(\"dd_monitoring:enabled\",\"dd_apache:enabled\",\"process:apache\",\"env:${var.environment}\").by(\"host\",\"port\").last(6).count_by_status()" thresholds = { ok = 1 @@ -11,10 +11,10 @@ resource "datadog_monitor" "Apache_process" { critical = 4 } - notify_no_data = false + notify_no_data = true evaluation_delay = "${var.evaluation_delay}" new_host_delay = "${var.evaluation_delay}" - renotify_interval = 60 + renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true @@ -22,5 +22,5 @@ resource "datadog_monitor" "Apache_process" { require_full_window = true no_data_timeframe = 20 - tags = ["env:${var.environment}", "type:apache"] + tags = ["env:${var.environment}", "type:resource"] } diff --git a/middleware/nginx/README.md b/middleware/nginx/README.md index 435dde6..ccf7772 100644 --- a/middleware/nginx/README.md +++ b/middleware/nginx/README.md @@ -24,7 +24,6 @@ Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| dd_nginx | | string | `disabled` | no | | environment | Architecture Environment | string | - | yes | | evaluation_delay | Delay in seconds for the metric evaluation | string | `15` | no | | message | Message sent when an alert is triggered | string | - | yes | diff --git a/middleware/nginx/inputs.tf b/middleware/nginx/inputs.tf index f823913..ea23988 100644 --- a/middleware/nginx/inputs.tf +++ b/middleware/nginx/inputs.tf @@ -15,7 +15,3 @@ variable "message" { } # Nginx Middleware specific - -variable "dd_nginx" { - default = "disabled" -} diff --git a/middleware/nginx/monitors-nginx.tf b/middleware/nginx/monitors-nginx.tf index b018ec3..a569c60 100644 --- a/middleware/nginx/monitors-nginx.tf +++ b/middleware/nginx/monitors-nginx.tf @@ -3,7 +3,7 @@ resource "datadog_monitor" "Nginx_process" { message = "${var.message}" type = "service check" - query = "\"process.up\".over(\"dd_monitoring:enabled\",\"dd_nginx:enabled\",\"process:nginx\",\"env:${var.environment}\").by(\"host\",\"process\", \"app\").last(4).count_by_status()" + query = "\"nginx.can_connect\".over(\"dd_monitoring:enabled\",\"dd_nginx:enabled\",\"process:nginx\",\"env:${var.environment}\").by(\"host\",\"port\").last(6).count_by_status()" thresholds = { ok = 1 @@ -11,10 +11,10 @@ resource "datadog_monitor" "Nginx_process" { critical = 4 } - notify_no_data = false + notify_no_data = true evaluation_delay = "${var.evaluation_delay}" new_host_delay = "${var.evaluation_delay}" - renotify_interval = 60 + renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true @@ -22,5 +22,5 @@ resource "datadog_monitor" "Nginx_process" { require_full_window = true no_data_timeframe = 20 - tags = ["env:${var.environment}", "type:nginx"] + tags = ["env:${var.environment}", "type:resource"] } diff --git a/middleware/php-fpm/monitors-fpm.tf b/middleware/php-fpm/monitors-fpm.tf index 95289e8..2b06108 100644 --- a/middleware/php-fpm/monitors-fpm.tf +++ b/middleware/php-fpm/monitors-fpm.tf @@ -2,7 +2,7 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_rds:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_php_fpm:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" } } @@ -25,7 +25,7 @@ resource "datadog_monitor" "php-fpm_process_idle" { critical = "${var.php_fpm_busy_threshold_critical}" } - notify_no_data = false + notify_no_data = true evaluation_delay = "${var.evaluation_delay_metric}" new_host_delay = "${var.evaluation_delay_metric}" notify_audit = false @@ -35,7 +35,7 @@ resource "datadog_monitor" "php-fpm_process_idle" { require_full_window = true no_data_timeframe = 20 - tags = ["env:${var.environment}", "type:php-fpm"] + tags = ["env:${var.environment}", "type:resource"] } resource "datadog_monitor" "FPM_process" { @@ -43,7 +43,7 @@ resource "datadog_monitor" "FPM_process" { message = "${var.message}" type = "service check" - query = "\"process.up\".over(\"dd_monitoring:enabled\",\"dd_php_fpm:enabled\",\"process:php_fpm\",\"env:${var.environment}\").by(\"host\",\"process\", \"app\").last(4).count_by_status()" + query = "\"php_fpm.can_ping\".over(\"dd_monitoring:enabled\",\"dd_php_fpm:enabled\",\"process:php_fpm\",\"env:${var.environment}\").by(\"host\",\"port\").last(6).count_by_status()" thresholds = { ok = 1 @@ -54,7 +54,7 @@ resource "datadog_monitor" "FPM_process" { notify_no_data = true evaluation_delay = "${var.evaluation_delay_service}" new_host_delay = "${var.evaluation_delay_service}" - renotify_interval = 60 + renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true @@ -62,5 +62,5 @@ resource "datadog_monitor" "FPM_process" { require_full_window = true no_data_timeframe = 20 - tags = ["env:${var.environment}", "type:php-fpm"] + tags = ["env:${var.environment}", "type:resource"] } diff --git a/system/generic/monitors-custom-cpu.tf b/system/generic/monitors-custom-cpu.tf index 57b9b03..7f862cf 100644 --- a/system/generic/monitors-custom-cpu.tf +++ b/system/generic/monitors-custom-cpu.tf @@ -2,7 +2,7 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_rds:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_system:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" } } @@ -12,8 +12,8 @@ resource "datadog_monitor" "cpu_custom" { query = < ${var.custom_cpu_threshold_critical}" EOF @@ -27,7 +27,7 @@ resource "datadog_monitor" "cpu_custom" { notify_no_data = true evaluation_delay = "${var.evaluation_delay}" new_host_delay = "${var.evaluation_delay}" - renotify_interval = 60 + renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true diff --git a/system/linux/monitors-linux-basics.tf b/system/linux/monitors-linux-basics.tf index 6bd173d..d5762fe 100644 --- a/system/linux/monitors-linux-basics.tf +++ b/system/linux/monitors-linux-basics.tf @@ -2,7 +2,7 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_rds:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_linux:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" } } @@ -122,7 +122,7 @@ resource "datadog_monitor" "datadog_free_memory" { notify_no_data = true evaluation_delay = "${var.evaluation_delay}" new_host_delay = "${var.evaluation_delay}" - renotify_interval = 60 + renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true