Merged in MON-75-azure-sql-monitors (pull request #14)

MON-75: Azure SQL Databases monitors

Approved-by: Laurent Piroelle <laurent.piroelle@fr.clara.net>
Approved-by: Alexandre Gaillet <alexandre.gaillet@fr.clara.net>
This commit is contained in:
Kevin Pecquet 2017-11-27 14:21:23 +00:00 committed by Jérôme Respaut
commit ffa059cee5
3 changed files with 240 additions and 0 deletions

View File

@ -0,0 +1,49 @@
Azure SQL Database DataDog monitors
===================================
How to use this module
----------------------
```
module "datadog-monitors-azure-storage" {
source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/sql-database?ref={revision}"
message = "${module.datadog-message-alerting.alerting-message}"
environment = "${var.environment}"
}
```
Purpose
-------
Creates a DataDog monitors with the following checks :
* CPU High
* Free disk space low
* DTU Consumption high
* SQL deadlocks
Inputs
------
| Name | Description | Type | Default | Required |
|------|-------------|:----:|:-----:|:-----:|
| cpu_threshold_critical | CPU usage in percent (critical threshold) | string | `90` | no |
| cpu_threshold_warning | CPU usage in percent (warning threshold) | string | `` | no |
| deadlock_threshold_critical | Amount of Deadlocks (critical threshold) | string | `1` | no |
| delay | Delay in seconds for the metric evaluation | string | `600` | no |
| diskspace_threshold_critical | Disk space used in percent (critical threshold) | string | `90` | no |
| diskspace_threshold_warning | Disk space used in percent (warning threshold) | string | `80` | no |
| dtu_threshold_critical | Amount of DTU used (critical threshold) | string | `90` | no |
| dtu_threshold_warning | Amount of DTU used (warning threshold) | string | `85` | no |
| environment | Architecture Environment | string | - | yes |
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
| message | Message sent when an alert is triggered | string | - | yes |
Related documentation
---------------------
DataDog documentation: [https://docs.datadoghq.com/integrations/azure_sql_database/](https://docs.datadoghq.com/integrations/azure_sql_database/)
Azure SQL Database metrics documentation: [https://docs.microsoft.com/en-us/azure/sql-database/saas-dbpertenant-log-analytics](https://docs.microsoft.com/en-us/azure/sql-database/saas-dbpertenant-log-analytics)

View File

@ -0,0 +1,62 @@
# Global Terraform
variable "environment" {
description = "Architecture Environment"
type = "string"
}
# Global DataDog
variable "delay" {
description = "Delay in seconds for the metric evaluation"
default = 600
}
variable "message" {
description = "Message sent when an alert is triggered"
}
variable "filter_tags_use_defaults" {
description = "Use default filter tags convention"
default = "true"
}
variable "filter_tags_custom" {
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
default = "*"
}
# Azure SQL Database specific
variable "cpu_threshold_warning" {
description = "CPU usage in percent (warning threshold)"
default = ""
}
variable "cpu_threshold_critical" {
description = "CPU usage in percent (critical threshold)"
default = "90"
}
variable "diskspace_threshold_warning" {
description = "Disk space used in percent (warning threshold)"
default = "80"
}
variable "diskspace_threshold_critical" {
description = "Disk space used in percent (critical threshold)"
default = "90"
}
variable "dtu_threshold_warning" {
description = "Amount of DTU used (warning threshold)"
default = "85"
}
variable "dtu_threshold_critical" {
description = "Amount of DTU used (critical threshold)"
default = "90"
}
variable "deadlock_threshold_critical" {
description = "Amount of Deadlocks (critical threshold)"
default = "1"
}

View File

@ -0,0 +1,129 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_sqldatabase:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
resource "datadog_monitor" "sql-database_cpu_90_15min" {
name = "[${var.environment}] SQL Database CPU high > ${var.cpu_threshold_critical}% on {{name}}"
message = "${var.message}"
query = <<EOF
avg(last_15m): (
avg:azure.sql_servers_databases.cpu_percent{${data.template_file.filter.rendered}} by {name,resource_group}
) > ${var.cpu_threshold_critical}
EOF
type = "metric alert"
thresholds {
critical = "${var.cpu_threshold_critical}"
}
notify_no_data = true
evaluation_delay = "${var.delay}"
renotify_interval = 0
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = true
new_host_delay = "${var.delay}"
no_data_timeframe = 20
tags = ["env:${var.environment}", "resource:sqldatabase", "team:azure", "provider:azure"]
}
resource "datadog_monitor" "sql-database_free_space_low" {
name = "[${var.environment}] SQL Database free space < ${var.diskspace_threshold_critical}% on {{name}}"
message = "${var.message}"
type = "metric alert"
query = <<EOF
avg(last_15m): (
avg:azure.sql_servers_databases.storage_percent{${data.template_file.filter.rendered}} by {name,resource_group}
) > ${var.diskspace_threshold_critical}
EOF
thresholds {
warning = "${var.diskspace_threshold_warning}"
critical = "${var.diskspace_threshold_critical}"
}
notify_no_data = true
evaluation_delay = "${var.delay}"
renotify_interval = 0
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = true
new_host_delay = "${var.delay}"
no_data_timeframe = 20
tags = ["env:${var.environment}", "resource:sqldatabase", "team:azure", "provider:azure"]
}
resource "datadog_monitor" "sql-database_dtu_consumption_high" {
name = "[${var.environment}] SQL Database DTU Consumption on {{name}} > ${var.dtu_threshold_critical}"
message = "${var.message}"
type = "metric alert"
query = <<EOF
avg(last_15m): (
azure.sql_servers_databases.dtu_consumption_percent{${data.template_file.filter.rendered}} by {name,resource_group}
) > ${var.dtu_threshold_critical}
EOF
thresholds {
warning = "${var.dtu_threshold_warning}"
critical = "${var.dtu_threshold_critical}"
}
notify_no_data = true
evaluation_delay = "${var.delay}"
renotify_interval = 0
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = true
new_host_delay = "${var.delay}"
no_data_timeframe = 20
tags = ["env:${var.environment}", "resource:sqldatabase", "team:azure", "provider:azure"]
}
resource "datadog_monitor" "sql-database_deadlocks_count" {
name = "[${var.environment}] SQL Database Deadlocks too high on {{name}}"
message = "${var.message}"
type = "metric alert"
query = <<EOF
sum(last_5m): (
avg:azure.sql_servers_databases.deadlock{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
) > ${var.deadlock_threshold_critical}
EOF
thresholds {
critical = "${var.deadlock_threshold_critical}"
}
notify_no_data = false
evaluation_delay = "${var.delay}"
renotify_interval = 0
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = true
new_host_delay = "${var.delay}"
no_data_timeframe = 20
tags = ["env:${var.environment}", "resource:sqldatabase", "team:azure", "provider:azure"]
}