From 012d16b77a7dddef1b66176295fe075ef516c01d Mon Sep 17 00:00:00 2001
From: Laurent Piroelle <laurent.piroelle@fr.clara.net>
Date: Thu, 23 Nov 2017 17:52:01 +0100
Subject: [PATCH] MON-74 Normalize monitors

---
 cloud/azure/app-services/README.md            |  54 +--------
 cloud/azure/app-services/inputs.tf            | 113 +-----------------
 .../app-services/monitors-app_services.tf     |  63 +++++-----
 3 files changed, 40 insertions(+), 190 deletions(-)
diff --git a/cloud/azure/app-services/README.md b/cloud/azure/app-services/README.md
index 90f5882..e56fac2 100644
--- a/cloud/azure/app-services/README.md
+++ b/cloud/azure/app-services/README.md
@@ -8,10 +8,8 @@ How to use this module
 module "datadog-monitors-azure-app-services" {
   source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/app-services?ref={revision}"
 
-  message = "${module.datadog-message-alerting.alerting-message}"
-
+  message     = "${module.datadog-message-alerting.alerting-message}"
   environment = "${var.environment}"
-  client_name = "${var.client_name}"
 }
 ```
 
@@ -28,65 +26,23 @@ Creates a DataDog monitors with the following checks :
 Inputs
 ------
 
-| Name | Description | Type | Default | Required |                                                                     DESKTOP-0PBDRFR:  ~
-|------|-------------|:----:|:-----:|:-----:|                                                                          →
-| client_name | Client Name | string | - | yes |
+| Name | Description | Type | Default | Required |
+|------|-------------|:----:|:-----:|:-----:|
 | delay | Delay in seconds for the metric evaluation | string | `600` | no |
 | environment | Architecture environment | string | - | yes |
-| http_2xx_status_rate_last_time_window_code | Query time window code, can be: 1h|4h|1d|2d|1w|1m|3m... to write last_#
-m (1, 5, 10, 15, or 30), last_#h (1, 2, or 4), or last_#d (1 or 2) | string | `5m` | no |
+| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
+| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
 | http_2xx_status_rate_limit |  | string | `30` | no |
-| http_2xx_status_rate_require_full_window | A boolean indicating whether this monitor needs a full window of data bef
-ore it's evaluated. We highly recommend you set this to False for sparse metrics, otherwise some evaluations will be s
-kipped. Default: True for 'on average', 'at all times' and 'in total' aggregation. False otherwise. | string | `true`
-| no |
-| http_2xx_status_rate_tags | A list of tags to associate with your monitor. This can help you categorize and filter m
-onitors in the manage monitors page of the UI. Note: it's not currently possible to filter by these tags when querying
- via the API | string | `<list>` | no |
 | http_2xx_status_rate_threshold_critical | Alerting threshold (percentage) | string | `0.9` | no |
 | http_2xx_status_rate_threshold_warning | Warning threshold (percentage) | string | `0.95` | no |
-| http_2xx_status_rate_timeout_h | The number of hours of the monitor not reporting data before it will automatically
-resolve from a triggered state. Defaults to false. | string | `false` | no |
-| http_404_errors_count_rate_last_time_window_code | Query time window code, can be: 1h|4h|1d|2d|1w|1m|3m... to write
-last_#m (1, 5, 10, 15, or 30), last_#h (1, 2, or 4), or last_#d (1 or 2) | string | `5m` | no |
 | http_404_errors_count_rate_limit |  | string | `30` | no |
-| http_404_errors_count_rate_require_full_window | A boolean indicating whether this monitor needs a full window of da
-ta before it's evaluated. We highly recommend you set this to False for sparse metrics, otherwise some evaluations wil
-l be skipped. Default: True for 'on average', 'at all times' and 'in total' aggregation. False otherwise. | string | `
-true` | no |
-| http_404_errors_count_rate_tags | A list of tags to associate with your monitor. This can help you categorize and fi
-lter monitors in the manage monitors page of the UI. Note: it's not currently possible to filter by these tags when qu
-erying via the API | string | `<list>` | no |
 | http_404_errors_count_rate_threshold_critical | Alerting threshold (number of requests) | string | `30` | no |
 | http_404_errors_count_rate_threshold_warning | Warning threshold (number of requests) | string | `10` | no |
-| http_404_errors_count_rate_timeout_h | The number of hours of the monitor not reporting data before it will automati
-cally resolve from a triggered state. Defaults to false. | string | `false` | no |
-| memory_usage_last_time_window_code | Query time window code, can be: 1h|4h|1d|2d|1w|1m|3m... to write last_#m (1, 5,
- 10, 15, or 30), last_#h (1, 2, or 4), or last_#d (1 or 2) | string | `5m` | no |
-| memory_usage_require_full_window | A boolean indicating whether this monitor needs a full window of data before it's
- evaluated. We highly recommend you set this to False for sparse metrics, otherwise some evaluations will be skipped.
-Default: True for 'on average', 'at all times' and 'in total' aggregation. False otherwise. | string | `false` | no |
-| memory_usage_tags | A list of tags to associate with your monitor. This can help you categorize and filter monitors
-in the manage monitors page of the UI. Note: it's not currently possible to filter by these tags when querying via the
- API | string | `<list>` | no |
 | memory_usage_threshold_critical | Alerting threshold in Mib | string | `52430000` | no |
 | memory_usage_threshold_warning | Warning threshold in MiB | string | `33550000` | no |
-| memory_usage_timeout_h | The number of hours of the monitor not reporting data before it will automatically resolve
-from a triggered state. Defaults to false. | string | `false` | no |
 | message | Message sent when a monitor is triggered | string | - | yes |
-| response_time_last_time_window_code | Query time window code, can be: 1h|4h|1d|2d|1w|1m|3m... to write last_#m (1, 5
-, 10, 15, or 30), last_#h (1, 2, or 4), or last_#d (1 or 2) | string | `1h` | no |
-| response_time_require_full_window | A boolean indicating whether this monitor needs a full window of data before it'
-s evaluated. We highly recommend you set this to False for sparse metrics, otherwise some evaluations will be skipped.
- Default: True for 'on average', 'at all times' and 'in total' aggregation. False otherwise. | string | `false` | no |
-| response_time_tags | A list of tags to associate with your monitor. This can help you categorize and filter monitors
- in the manage monitors page of the UI. Note: it's not currently possible to filter by these tags when querying via th
-e API | string | `<list>` | no |
 | response_time_threshold_critical | Alerting threshold in seconds | string | `0.8` | no |
 | response_time_threshold_warning | Warning threshold in seconds | string | `0.4` | no |
-| response_time_timeout_h | The number of hours of the monitor not reporting data before it will automatically resolve
- from a triggered state. Defaults to false. | string | `false` | no |
-| use_filter_tags | Filter the data with service tags if true | string | `true` | no |
 
 Related documentation
 ---------------------
diff --git a/cloud/azure/app-services/inputs.tf b/cloud/azure/app-services/inputs.tf
index 5f0f2b0..c4bc451 100644
--- a/cloud/azure/app-services/inputs.tf
+++ b/cloud/azure/app-services/inputs.tf
@@ -3,14 +3,14 @@ variable "environment" {
   type        = "string"
 }
 
-variable "client_name" {
-  description = "Client Name"
-  type        = "string"
+variable "filter_tags_use_defaults" {
+  description = "Use default filter tags convention"
+  default     = "true"
 }
 
-variable "use_filter_tags" {
-  description = "Filter the data with service tags if true"
-  default     = "true"
+variable "filter_tags_custom" {
+  description = "Tags used for custom filtering when filter_tags_use_defaults is false"
+  default     = "*"
 }
 
 variable "message" {
@@ -36,31 +36,6 @@ variable "response_time_threshold_warning" {
   description = "Warning threshold in seconds"
 }
 
-variable "response_time_last_time_window_code" {
-  default     = "1h"
-  description = "Query time window code, can be: 1h|4h|1d|2d|1w|1m|3m... to write last_#m (1, 5, 10, 15, or 30), last_#h (1, 2, or 4), or last_#d (1 or 2)"
-}
-
-variable "response_time_require_full_window" {
-  default     = false
-  description = "A boolean indicating whether this monitor needs a full window of data before it's evaluated. We highly recommend you set this to False for sparse metrics, otherwise some evaluations will be skipped. Default: True for 'on average', 'at all times' and 'in total' aggregation. False otherwise."
-}
-
-variable "response_time_tags" {
-  default     = []
-  description = "A list of tags to associate with your monitor. This can help you categorize and filter monitors in the manage monitors page of the UI. Note: it's not currently possible to filter by these tags when querying via the API"
-}
-
-variable "response_time_timeout_h" {
-  default     = false
-  description = "The number of hours of the monitor not reporting data before it will automatically resolve from a triggered state. Defaults to false."
-}
-
-# variable "response_time_notify_no_data" {
-#   default = true
-#   description = " boolean indicating whether this monitor will notify when data stops reporting. Defaults to true."
-# }
-
 ###################################
 ###   MEMORY USAGE VARIABLES   ###
 ###################################
@@ -75,31 +50,6 @@ variable "memory_usage_threshold_warning" {
   description = "Warning threshold in MiB"
 }
 
-variable "memory_usage_last_time_window_code" {
-  default     = "5m"
-  description = "Query time window code, can be: 1h|4h|1d|2d|1w|1m|3m... to write last_#m (1, 5, 10, 15, or 30), last_#h (1, 2, or 4), or last_#d (1 or 2)"
-}
-
-variable "memory_usage_require_full_window" {
-  default     = false
-  description = "A boolean indicating whether this monitor needs a full window of data before it's evaluated. We highly recommend you set this to False for sparse metrics, otherwise some evaluations will be skipped. Default: True for 'on average', 'at all times' and 'in total' aggregation. False otherwise."
-}
-
-variable "memory_usage_tags" {
-  default     = []
-  description = "A list of tags to associate with your monitor. This can help you categorize and filter monitors in the manage monitors page of the UI. Note: it's not currently possible to filter by these tags when querying via the API"
-}
-
-variable "memory_usage_timeout_h" {
-  default     = false
-  description = "The number of hours of the monitor not reporting data before it will automatically resolve from a triggered state. Defaults to false."
-}
-
-# variable "memory_usage_notify_no_data" {
-#   default = true
-#   description = " boolean indicating whether this monitor will notify when data stops reporting. Defaults to true."
-# }
-
 #################################
 ###   HTTP 404 status pages   ###
 #################################
@@ -118,31 +68,6 @@ variable "http_404_errors_count_rate_threshold_warning" {
   description = "Warning threshold (number of requests)"
 }
 
-variable "http_404_errors_count_rate_last_time_window_code" {
-  default     = "5m"
-  description = "Query time window code, can be: 1h|4h|1d|2d|1w|1m|3m... to write last_#m (1, 5, 10, 15, or 30), last_#h (1, 2, or 4), or last_#d (1 or 2)"
-}
-
-variable "http_404_errors_count_rate_require_full_window" {
-  default     = true
-  description = "A boolean indicating whether this monitor needs a full window of data before it's evaluated. We highly recommend you set this to False for sparse metrics, otherwise some evaluations will be skipped. Default: True for 'on average', 'at all times' and 'in total' aggregation. False otherwise."
-}
-
-variable "http_404_errors_count_rate_tags" {
-  default     = []
-  description = "A list of tags to associate with your monitor. This can help you categorize and filter monitors in the manage monitors page of the UI. Note: it's not currently possible to filter by these tags when querying via the API"
-}
-
-variable "http_404_errors_count_rate_timeout_h" {
-  default     = false
-  description = "The number of hours of the monitor not reporting data before it will automatically resolve from a triggered state. Defaults to false."
-}
-
-# variable "http_404_errors_count_rate_notify_no_data" {
-#   default = true
-#   description = " boolean indicating whether this monitor will notify when data stops reporting. Defaults to true."
-# }
-
 #################################
 ###   HTTP 202 status pages   ###
 #################################
@@ -160,29 +85,3 @@ variable "http_2xx_status_rate_threshold_warning" {
   default     = 0.95
   description = "Warning threshold (percentage)"
 }
-
-variable "http_2xx_status_rate_last_time_window_code" {
-  default     = "5m"
-  description = "Query time window code, can be: 1h|4h|1d|2d|1w|1m|3m... to write last_#m (1, 5, 10, 15, or 30), last_#h (1, 2, or 4), or last_#d (1 or 2)"
-}
-
-variable "http_2xx_status_rate_require_full_window" {
-  default     = true
-  description = "A boolean indicating whether this monitor needs a full window of data before it's evaluated. We highly recommend you set this to False for sparse metrics, otherwise some evaluations will be skipped. Default: True for 'on average', 'at all times' and 'in total' aggregation. False otherwise."
-}
-
-variable "http_2xx_status_rate_tags" {
-  default     = []
-  description = "A list of tags to associate with your monitor. This can help you categorize and filter monitors in the manage monitors page of the UI. Note: it's not currently possible to filter by these tags when querying via the API"
-}
-
-variable "http_2xx_status_rate_timeout_h" {
-  default     = false
-  description = "The number of hours of the monitor not reporting data before it will automatically resolve from a triggered state. Defaults to false."
-}
-
-# variable "http_2xx_status_rate_notify_no_data" {
-#   default = true
-#   description = " boolean indicating whether this monitor will notify when data stops reporting. Defaults to true."
-# }
-
diff --git a/cloud/azure/app-services/monitors-app_services.tf b/cloud/azure/app-services/monitors-app_services.tf
index 437b7fb..1cff1af 100644
--- a/cloud/azure/app-services/monitors-app_services.tf
+++ b/cloud/azure/app-services/monitors-app_services.tf
@@ -2,18 +2,18 @@ data "template_file" "filter" {
   template = "$${filter}"
 
   vars {
-    filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_appservices:enabled,env:%s", var.environment) : "*"}"
+    filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
   }
 }
 
 # Monitoring App Services response time
 resource "datadog_monitor" "appservices_response_time" {
-  name    = "[${var.environment}] App Services response time {{value}}s is above ${var.response_time_threshold_critical}s"
-  type    = "query alert"
+  name    = "[${var.environment}] App Services response time > ${var.response_time_threshold_critical}s on {{name}}"
+  type    = "metric alert"
   message = "${var.message}"
 
   query = <<EOF
-    avg(last_${var.response_time_last_time_window_code}): (
+    avg(last_5m): (
       avg:azure.app_services.average_response_time{${data.template_file.filter.rendered}}
     ) >= ${var.response_time_threshold_critical}
   EOF
@@ -26,24 +26,23 @@ resource "datadog_monitor" "appservices_response_time" {
     critical = "${var.response_time_threshold_critical}"
   }
 
-  notify_no_data      = true                                       # Will notify when no data is received
+  notify_no_data      = true  # Will notify when no data is received
   renotify_interval   = 0
-  require_full_window = "${var.response_time_require_full_window}"
+  require_full_window = true
+  timeout_h           = 0
+  include_tags        = true
 
-  timeout_h    = "${var.response_time_timeout_h}"
-  include_tags = true
-
-  tags = "${var.response_time_tags}"
+  tags = ["env:${var.environment}", "resource:appservices", "team:azure", "provider:azure"]
 }
 
 # Monitoring App Services memory usage
 resource "datadog_monitor" "appservices_memory_usage_count" {
-  name    = "[${var.environment}] App Services memory usage {{value}} bytes is above ${ceil(var.memory_usage_threshold_critical/1000000)}MiB"
-  type    = "query alert"
+  name    = "[${var.environment}] App Services memory usage > ${ceil(var.memory_usage_threshold_critical/1000000)}MiB on {{name}}"
+  type    = "metric alert"
   message = "${var.message}"
 
   query = <<EOF
-    avg(last_${var.memory_usage_last_time_window_code}): (
+    avg(last_5m): (
       avg:azure.app_services.memory_working_set{${data.template_file.filter.rendered}}
     ) >= ${var.memory_usage_threshold_critical}
   EOF
@@ -58,22 +57,21 @@ resource "datadog_monitor" "appservices_memory_usage_count" {
 
   notify_no_data      = true                                      # Will notify when no data is received
   renotify_interval   = 0
-  require_full_window = "${var.memory_usage_require_full_window}"
+  require_full_window = true
+  timeout_h           = 0
+  include_tags        = true
 
-  timeout_h    = "${var.memory_usage_timeout_h}"
-  include_tags = true
-
-  tags = "${var.memory_usage_tags}"
+  tags = ["env:${var.environment}", "resource:appservices", "team:azure", "provider:azure"]
 }
 
 # Monitoring App Services 404 errors rate
 resource "datadog_monitor" "appservices_http_404_errors_count" {
-  name    = "[${var.environment}] App Services {{value}} HTTP errors > ${var.http_404_errors_count_rate_limit} limit"
-  type    = "query alert"
+  name    = "[${var.environment}] App Services HTTP errors > ${var.http_404_errors_count_rate_limit} limit on {{name}}"
+  type    = "metric alert"
   message = "${var.message}"
 
   query = <<EOF
-    max(last_${var.http_404_errors_count_rate_last_time_window_code}): (
+    max(last_5m): (
       per_minute(avg:azure.app_services.http404{${data.template_file.filter.rendered}}.as_rate())
     ) > ${var.http_404_errors_count_rate_threshold_critical}
   EOF
@@ -89,21 +87,20 @@ resource "datadog_monitor" "appservices_http_404_errors_count" {
   notify_no_data      = false # Will NOT notify when no data is received
   renotify_interval   = 0
   require_full_window = true
+  timeout_h           = 0
+  include_tags        = true
 
-  timeout_h    = "${var.http_404_errors_count_rate_timeout_h}"
-  include_tags = true
-
-  tags = "${var.http_404_errors_count_rate_tags}"
+  tags = ["env:${var.environment}", "resource:appservices", "team:azure", "provider:azure"]
 }
 
 # Monitoring App Services HTTP 2xx status pages rate
 resource "datadog_monitor" "appservices_http_2xx_status_rate" {
-  name    = "[${var.environment}] App Services {{value}} Too much non 2xx HTTP status in response to the requests"
-  type    = "query alert"
+  name    = "[${var.environment}] App Services Too much non 2xx HTTP status in response to the requests on {{name}}"
+  type    = "metric alert"
   message = "${var.message}"
 
   query = <<EOF
-    avg(last_${var.http_2xx_status_rate_last_time_window_code}): (
+    avg(last_5m): (
       avg:azure.app_services.http2xx{${data.template_file.filter.rendered}}.as_count() /
         avg:azure.app_services.http2xx{${data.template_file.filter.rendered}}.as_count()
     ) < ${var.http_2xx_status_rate_threshold_critical}
@@ -117,13 +114,11 @@ resource "datadog_monitor" "appservices_http_2xx_status_rate" {
     critical = "${var.http_2xx_status_rate_threshold_critical}"
   }
 
-  # Will notify when no data is received
-  notify_no_data      = true
+  notify_no_data      = true  # Will notify when no data is received
   renotify_interval   = 0
   require_full_window = true
+  timeout_h           = 0
+  include_tags        = true
 
-  timeout_h    = "${var.http_2xx_status_rate_timeout_h}"
-  include_tags = true
-
-  tags = "${var.http_2xx_status_rate_tags}"
+  tags = ["env:${var.environment}", "resource:appservices", "team:azure", "provider:azure"]
 }