From 71d78bacdec6dd85a13fcee5c911088386aed37a Mon Sep 17 00:00:00 2001 From: Kevin Pecquet Date: Mon, 30 Oct 2017 15:48:26 +0100 Subject: [PATCH 1/3] MON-75 SQL DB monitors init --- cloud/azure/sql-database/README.md | 44 +++++++ cloud/azure/sql-database/inputs.tf | 49 ++++++++ .../monitors-sql-database-basics.tf | 109 ++++++++++++++++++ 3 files changed, 202 insertions(+) create mode 100644 cloud/azure/sql-database/README.md create mode 100644 cloud/azure/sql-database/inputs.tf create mode 100644 cloud/azure/sql-database/monitors-sql-database-basics.tf diff --git a/cloud/azure/sql-database/README.md b/cloud/azure/sql-database/README.md new file mode 100644 index 0000000..5fb0387 --- /dev/null +++ b/cloud/azure/sql-database/README.md @@ -0,0 +1,44 @@ +Azure SQL Database DataDog monitors +============================ + +How to use this module +---------------------- + +``` +module "datadog-monitors-azure-storage" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/sql-database?ref={revision}" + + message = "${module.datadog-message-alerting.alerting-message}" + + environment = "${var.environment}" + client_name = "${var.client_name}" +} +``` + +Purpose +------- +Creates a DataDog monitors with the following checks : + +* CPU High +* Free disk space low +* DTU Consumption high +* SQL deadlocks + +Inputs +------ + +| Name | Type | Default | Required | +|------|:----:|:-------:|:--------:| +| client_name | Client name | string | - | yes | +| delay | Delay in seconds for the metric evaluation | string | `600` | no | +| environment | Architecture environment | string | - | yes | +| message | Message sent when a monitor is triggered | string | - | yes | +| use_filter_tags | Filter the data with service tags if true | string | `false` | no | +| dd_azure_sqldb | string | `disabled` | yes | +| cpu_threshold_warning | string | `85` | no | +| cpu_threshold_critical | string | `90` | no | +| diskspace_threshold_warning | string | `80` | no | +| diskspace_threshold_critical | string | `90` | no | +| dtu_threshold_warning | string | `85` | no | +| dtu_threshold_critical | string | `90` | no | +| deadlock_threshold_critical | string | `1` | no | diff --git a/cloud/azure/sql-database/inputs.tf b/cloud/azure/sql-database/inputs.tf new file mode 100644 index 0000000..77599b9 --- /dev/null +++ b/cloud/azure/sql-database/inputs.tf @@ -0,0 +1,49 @@ +variable "subscription_id" { + default = "" +} + +variable "message" { + description = "Message sent when a SQL DB monitor is triggered" +} + +variable "environment" {} + +variable "use_filter_tags" { + default = "false" +} + +variable "cpu_threshold_warning" { + default = "" +} + +variable "cpu_threshold_critical" { + default = "90" +} + +variable "diskspace_threshold_warning" { + default = "80" +} + +variable "diskspace_threshold_critical" { + default = "90" +} + +variable "dtu_threshold_warning" { + default = "85" +} + +variable "dtu_threshold_critical" { + default = "90" +} + +variable "deadlock_threshold_critical" { + default = "1" +} + +variable "delay" { + default = "600" +} + +variable "dd_azure_sqldb" { + default = "disabled" +} diff --git a/cloud/azure/sql-database/monitors-sql-database-basics.tf b/cloud/azure/sql-database/monitors-sql-database-basics.tf new file mode 100644 index 0000000..413e4020 --- /dev/null +++ b/cloud/azure/sql-database/monitors-sql-database-basics.tf @@ -0,0 +1,109 @@ +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_sqldb:enabled,env:%s",var.environment) : "*"}" + } +} + +resource "datadog_monitor" "sql-database_cpu_90_15min" { + name = "[${var.environment}] SQL Database CPU high > 90% for 15 min on {{name}}" + message = "${message}" + + count = "${var.dd_azure_sqldb == "enabled" ? 1 : 0 }" + + query = "avg(last_15m):avg:azure.sql_servers_databases.cpu_percent{${data.template_file.filter.rendered}} by {name,resource_group} > ${var.cpu_threshold_critical}" + type = "query alert" + + thresholds { + critical = "${var.cpu_threshold_critical}" + } + + notify_no_data = true + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 +} + +resource "datadog_monitor" "sql-database_free_space_low" { + name = "[${var.environment}] SQL Database free space < 10 % on {{name}}" + message = "${message}" + + type = "query alert" + query = "avg(last_15m):avg:azure.sql_servers_databases.storage_percent{${data.template_file.filter.rendered}} by {name,resource_group} > 90" + + count = "${var.dd_azure_sqldb == "enabled" ? 1 : 0 }" + + thresholds { + warning = "${var.diskspace_threshold_warning}" + critical = "${var.diskspace_threshold_critical}" + } + + notify_no_data = true + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 +} + +resource "datadog_monitor" "sql-database_dtu_consumption_high" { + name = "[${var.environment}] DTU Consumption on {{name}} > 90" + message = "${message}" + + type = "query alert" + query = "avg(last_15m):azure.sql_servers_databases.dtu_consumption_percent{${data.template_file.filter.rendered}} by {name,resource_group} > 90" + + count = "${var.dd_azure_sqldb == "enabled" ? 1 : 0 }" + + thresholds { + warning = "${var.dtu_threshold_warning}" + critical = "${var.dtu_threshold_critical}" + } + + notify_no_data = true + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 +} + +resource "datadog_monitor" "sql-database_deadlocks_count" { + name = "[${var.environment}] SQL Deadlocks too high on {{name}}" + message = "${message}" + + type = "query alert" + query = "sum(last_5m):avg:azure.sql_servers_databases.deadlock{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() > ${var.deadlock_threshold_critical}" + + count = "${var.dd_azure_sqldb == "enabled" ? 1 : 0 }" + + thresholds { + critical = "${var.deadlock_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 +} From 220dfe019dec2656687f8eddb25a18146159cb06 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Tue, 7 Nov 2017 11:58:54 +0100 Subject: [PATCH 2/3] MON-75 Add some descriptions and add EOF on queries --- cloud/azure/sql-database/README.md | 29 +++++---- cloud/azure/sql-database/inputs.tf | 65 +++++++++++++------ .../monitors-sql-database-basics.tf | 36 ++++++---- 3 files changed, 84 insertions(+), 46 deletions(-) diff --git a/cloud/azure/sql-database/README.md b/cloud/azure/sql-database/README.md index 5fb0387..f135036 100644 --- a/cloud/azure/sql-database/README.md +++ b/cloud/azure/sql-database/README.md @@ -27,18 +27,19 @@ Creates a DataDog monitors with the following checks : Inputs ------ -| Name | Type | Default | Required | -|------|:----:|:-------:|:--------:| -| client_name | Client name | string | - | yes | +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| cpu_threshold_critical | CPU usage in percent (critical threshold) | string | `90` | no | +| cpu_threshold_warning | CPU usage in percent (warning threshold) | string | `` | no | +| deadlock_threshold_critical | Amount of Deadlocks (critical threshold) | string | `1` | no | | delay | Delay in seconds for the metric evaluation | string | `600` | no | -| environment | Architecture environment | string | - | yes | -| message | Message sent when a monitor is triggered | string | - | yes | -| use_filter_tags | Filter the data with service tags if true | string | `false` | no | -| dd_azure_sqldb | string | `disabled` | yes | -| cpu_threshold_warning | string | `85` | no | -| cpu_threshold_critical | string | `90` | no | -| diskspace_threshold_warning | string | `80` | no | -| diskspace_threshold_critical | string | `90` | no | -| dtu_threshold_warning | string | `85` | no | -| dtu_threshold_critical | string | `90` | no | -| deadlock_threshold_critical | string | `1` | no | +| diskspace_threshold_critical | Disk space used in percent (critical threshold) | string | `90` | no | +| diskspace_threshold_warning | Disk space used in percent (warning threshold) | string | `80` | no | +| dtu_threshold_critical | Amount of DTU used (critical threshold) | string | `90` | no | +| dtu_threshold_warning | Amount of DTU used (warning threshold) | string | `85` | no | +| environment | Architecture Environment | string | - | yes | +| message | Message sent when an alert is triggered | string | - | yes | +| provider | Cloud provider which the monitor and its based metric depend on | string | `azure` | no | +| service | Service monitored by this set of monitors | string | `sql-database` | no | +| subscription_id | Azure account id used as filter for monitors | string | - | yes | +| use_filter_tags | Filter the data with service tags if true | string | `true` | no | diff --git a/cloud/azure/sql-database/inputs.tf b/cloud/azure/sql-database/inputs.tf index 77599b9..d3abe46 100644 --- a/cloud/azure/sql-database/inputs.tf +++ b/cloud/azure/sql-database/inputs.tf @@ -1,49 +1,74 @@ +# Global Terraform +variable "environment" { + description = "Architecture Environment" + type = "string" +} + variable "subscription_id" { - default = "" + description = "Azure account id used as filter for monitors" + type = "string" +} + +variable "provider" { + description = "Cloud provider which the monitor and its based metric depend on" + type = "string" + default = "azure" +} + +variable "service" { + description = "Service monitored by this set of monitors" + type = "string" + default = "sql-database" +} + +# Global DataDog +variable "delay" { + description = "Delay in seconds for the metric evaluation" + default = 600 } variable "message" { - description = "Message sent when a SQL DB monitor is triggered" + description = "Message sent when an alert is triggered" } -variable "environment" {} - variable "use_filter_tags" { - default = "false" + description = "Filter the data with service tags if true" + default = "true" } +# Azure SQL Database specific + variable "cpu_threshold_warning" { - default = "" + description = "CPU usage in percent (warning threshold)" + default = "" } variable "cpu_threshold_critical" { - default = "90" + description = "CPU usage in percent (critical threshold)" + default = "90" } variable "diskspace_threshold_warning" { - default = "80" + description = "Disk space used in percent (warning threshold)" + default = "80" } variable "diskspace_threshold_critical" { - default = "90" + description = "Disk space used in percent (critical threshold)" + default = "90" } variable "dtu_threshold_warning" { - default = "85" + description = "Amount of DTU used (warning threshold)" + default = "85" } variable "dtu_threshold_critical" { - default = "90" + description = "Amount of DTU used (critical threshold)" + default = "90" } variable "deadlock_threshold_critical" { - default = "1" -} - -variable "delay" { - default = "600" -} - -variable "dd_azure_sqldb" { - default = "disabled" + description = "Amount of Deadlocks (critical threshold)" + default = "1" } diff --git a/cloud/azure/sql-database/monitors-sql-database-basics.tf b/cloud/azure/sql-database/monitors-sql-database-basics.tf index 413e4020..5448b8a 100644 --- a/cloud/azure/sql-database/monitors-sql-database-basics.tf +++ b/cloud/azure/sql-database/monitors-sql-database-basics.tf @@ -10,10 +10,13 @@ resource "datadog_monitor" "sql-database_cpu_90_15min" { name = "[${var.environment}] SQL Database CPU high > 90% for 15 min on {{name}}" message = "${message}" - count = "${var.dd_azure_sqldb == "enabled" ? 1 : 0 }" + query = < ${var.cpu_threshold_critical} + EOF - query = "avg(last_15m):avg:azure.sql_servers_databases.cpu_percent{${data.template_file.filter.rendered}} by {name,resource_group} > ${var.cpu_threshold_critical}" - type = "query alert" + type = "query alert" thresholds { critical = "${var.cpu_threshold_critical}" @@ -35,10 +38,13 @@ resource "datadog_monitor" "sql-database_free_space_low" { name = "[${var.environment}] SQL Database free space < 10 % on {{name}}" message = "${message}" - type = "query alert" - query = "avg(last_15m):avg:azure.sql_servers_databases.storage_percent{${data.template_file.filter.rendered}} by {name,resource_group} > 90" + type = "query alert" - count = "${var.dd_azure_sqldb == "enabled" ? 1 : 0 }" + query = < ${var.diskspace_threshold_critical} + EOF thresholds { warning = "${var.diskspace_threshold_warning}" @@ -61,10 +67,13 @@ resource "datadog_monitor" "sql-database_dtu_consumption_high" { name = "[${var.environment}] DTU Consumption on {{name}} > 90" message = "${message}" - type = "query alert" - query = "avg(last_15m):azure.sql_servers_databases.dtu_consumption_percent{${data.template_file.filter.rendered}} by {name,resource_group} > 90" + type = "query alert" - count = "${var.dd_azure_sqldb == "enabled" ? 1 : 0 }" + query = < ${var.dtu_threshold_critical} + EOF thresholds { warning = "${var.dtu_threshold_warning}" @@ -87,10 +96,13 @@ resource "datadog_monitor" "sql-database_deadlocks_count" { name = "[${var.environment}] SQL Deadlocks too high on {{name}}" message = "${message}" - type = "query alert" - query = "sum(last_5m):avg:azure.sql_servers_databases.deadlock{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() > ${var.deadlock_threshold_critical}" + type = "query alert" - count = "${var.dd_azure_sqldb == "enabled" ? 1 : 0 }" + query = < ${var.deadlock_threshold_critical} + EOF thresholds { critical = "${var.deadlock_threshold_critical}" From 28e2e87a93fa5b35d0a10b64086c19120baee530 Mon Sep 17 00:00:00 2001 From: Laurent Piroelle Date: Thu, 23 Nov 2017 17:34:30 +0100 Subject: [PATCH 3/3] MON-75 Normalize monitors --- cloud/azure/sql-database/README.md | 20 ++++++----- cloud/azure/sql-database/inputs.tf | 26 ++++---------- .../monitors-sql-database-basics.tf | 34 ++++++++++++------- 3 files changed, 40 insertions(+), 40 deletions(-) diff --git a/cloud/azure/sql-database/README.md b/cloud/azure/sql-database/README.md index f135036..7d815e3 100644 --- a/cloud/azure/sql-database/README.md +++ b/cloud/azure/sql-database/README.md @@ -1,5 +1,5 @@ Azure SQL Database DataDog monitors -============================ +=================================== How to use this module ---------------------- @@ -8,10 +8,8 @@ How to use this module module "datadog-monitors-azure-storage" { source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/sql-database?ref={revision}" - message = "${module.datadog-message-alerting.alerting-message}" - + message = "${module.datadog-message-alerting.alerting-message}" environment = "${var.environment}" - client_name = "${var.client_name}" } ``` @@ -38,8 +36,14 @@ Inputs | dtu_threshold_critical | Amount of DTU used (critical threshold) | string | `90` | no | | dtu_threshold_warning | Amount of DTU used (warning threshold) | string | `85` | no | | environment | Architecture Environment | string | - | yes | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when an alert is triggered | string | - | yes | -| provider | Cloud provider which the monitor and its based metric depend on | string | `azure` | no | -| service | Service monitored by this set of monitors | string | `sql-database` | no | -| subscription_id | Azure account id used as filter for monitors | string | - | yes | -| use_filter_tags | Filter the data with service tags if true | string | `true` | no | + +Related documentation +--------------------- + +DataDog documentation: [https://docs.datadoghq.com/integrations/azure_sql_database/](https://docs.datadoghq.com/integrations/azure_sql_database/) + +Azure SQL Database metrics documentation: [https://docs.microsoft.com/en-us/azure/sql-database/saas-dbpertenant-log-analytics](https://docs.microsoft.com/en-us/azure/sql-database/saas-dbpertenant-log-analytics) + diff --git a/cloud/azure/sql-database/inputs.tf b/cloud/azure/sql-database/inputs.tf index d3abe46..9ddab06 100644 --- a/cloud/azure/sql-database/inputs.tf +++ b/cloud/azure/sql-database/inputs.tf @@ -4,23 +4,6 @@ variable "environment" { type = "string" } -variable "subscription_id" { - description = "Azure account id used as filter for monitors" - type = "string" -} - -variable "provider" { - description = "Cloud provider which the monitor and its based metric depend on" - type = "string" - default = "azure" -} - -variable "service" { - description = "Service monitored by this set of monitors" - type = "string" - default = "sql-database" -} - # Global DataDog variable "delay" { description = "Delay in seconds for the metric evaluation" @@ -31,11 +14,16 @@ variable "message" { description = "Message sent when an alert is triggered" } -variable "use_filter_tags" { - description = "Filter the data with service tags if true" +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" default = "true" } +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + # Azure SQL Database specific variable "cpu_threshold_warning" { diff --git a/cloud/azure/sql-database/monitors-sql-database-basics.tf b/cloud/azure/sql-database/monitors-sql-database-basics.tf index 5448b8a..1e75813 100644 --- a/cloud/azure/sql-database/monitors-sql-database-basics.tf +++ b/cloud/azure/sql-database/monitors-sql-database-basics.tf @@ -2,13 +2,13 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_sqldb:enabled,env:%s",var.environment) : "*"}" + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_sqldatabase:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" } } resource "datadog_monitor" "sql-database_cpu_90_15min" { - name = "[${var.environment}] SQL Database CPU high > 90% for 15 min on {{name}}" - message = "${message}" + name = "[${var.environment}] SQL Database CPU high > ${var.cpu_threshold_critical}% on {{name}}" + message = "${var.message}" query = < ${var.cpu_threshold_critical} EOF - type = "query alert" + type = "metric alert" thresholds { critical = "${var.cpu_threshold_critical}" @@ -32,13 +32,15 @@ resource "datadog_monitor" "sql-database_cpu_90_15min" { require_full_window = true new_host_delay = "${var.delay}" no_data_timeframe = 20 + + tags = ["env:${var.environment}", "resource:sqldatabase", "team:azure", "provider:azure"] } resource "datadog_monitor" "sql-database_free_space_low" { - name = "[${var.environment}] SQL Database free space < 10 % on {{name}}" - message = "${message}" + name = "[${var.environment}] SQL Database free space < ${var.diskspace_threshold_critical}% on {{name}}" + message = "${var.message}" - type = "query alert" + type = "metric alert" query = <