Merged in MON-73-azure-managed-services-monitors (pull request #23)

MON-73 Azure managed services monitors Approved-by: Laurent Piroelle <laurent.piroelle@fr.clara.net> Approved-by: Jérôme Respaut <shr3ps@gmail.com> Approved-by: Alexandre Gaillet <alexandre.gaillet@fr.clara.net>
2017-12-20 16:39:21 +00:00 · 2017-12-20 16:39:21 +00:00 · a3269d9557
commit a3269d9557
parent 27723340fe 025ff04d9e
26 changed files with 2842 additions and 11 deletions
--- a/cloud/azure/README.md
+++ b/cloud/azure/README.md
@ -0,0 +1,118 @@
 Azure monitors
 ==============
 How to use this module
 ----------------------
 ```
 module "datadog-monitors-azure" {
  source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure?ref={revision}"
  message     = "${module.datadog-message-alerting.alerting-message}"
  environment = "${var.environment}"
 }
 ```
 Purpose
 -------
 Creates a set of Azure DataDog monitors for the following components :
 * Azure App Services monitors
 * Azure SQL monitors
 * Azure Redis monitors
 * Azure Event Hub monitors
 * Azure Stream Analytics monitors
 * Azure Storage monitors
 * Azure IOT Hub monitors
 * Azure API Management monitors
 Inputs
 ------
 | Name | Description | Type | Default | Required |
 |------|-------------|:----:|:-----:|:-----:|
 | apimanagement_failed_requests_threshold_critical | Maximum acceptable percent of failed requests | string | `5` | no |
 | apimanagement_other_requests_threshold_critical | Maximum acceptable percent of other requests | string | `5` | no |
 | apimanagement_successful_requests_threshold_critical | Minimum acceptable percent of successful requests | string | `90` | no |
 | apimanagement_unauthorized_requests_threshold_critical | Maximum acceptable percent of unauthorized requests | string | `5` | no |
 | appservices_http_2xx_requests_threshold_critical | Minimum critical acceptable percent of 2xx requests | string | `90` | no |
 | appservices_http_2xx_requests_threshold_warning | Minimum warning acceptable percent of 2xx requests | string | `95` | no |
 | appservices_http_4xx_requests_threshold_critical | Maximum critical acceptable percent of 4xx errors | string | `30` | no |
 | appservices_http_4xx_requests_threshold_warning | Maximum warning acceptable percent of 4xx errors | string | `15` | no |
 | appservices_http_5xx_requests_threshold_critical | Maximum critical acceptable percent of 5xx errors | string | `20` | no |
 | appservices_http_5xx_requests_threshold_warning | Maximum warning acceptable percent of 5xx errors | string | `10` | no |
 | appservices_memory_usage_threshold_critical | Alerting threshold in Mib | string | `52430000` | no |
 | appservices_memory_usage_threshold_warning | Warning threshold in MiB | string | `33550000` | no |
 | appservices_response_time_threshold_critical | Alerting threshold in seconds | string | `0.8` | no |
 | appservices_response_time_threshold_warning | Warning threshold in seconds | string | `0.4` | no |
 | delay | Delay in seconds for the metric evaluation | string | `600` | no |
 | environment | Architecture environment | string | - | yes |
 | eventhub_errors_rate_thresold_critical | Errors ratio (percentage) to trigger the critical alert | string | `3` | no |
 | eventhub_errors_rate_thresold_warning | Errors ratio (percentage) to trigger a warning alert | string | `1` | no |
 | eventhub_failed_requests_rate_thresold_critical | Failed requests ratio (percentage) to trigger the critical alert | string | `3` | no |
 | eventhub_failed_requests_rate_thresold_warning | Failed requests ratio (percentage) to trigger a warning alert | string | `1` | no |
 | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
 | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
 | iothub_dropped_d2c_telemetry_egress_threshold_critical | D2C Telemetry Dropped limit (critical threshold) | string | `1000` | no |
 | iothub_dropped_d2c_telemetry_egress_threshold_warning | D2C Telemetry Dropped limit (warning threshold) | string | `500` | no |
 | iothub_failed_c2d_methods_rate_threshold_critical | C2D Methods Failed rate limit (critical threshold) | string | `10` | no |
 | iothub_failed_c2d_methods_rate_threshold_warning | C2D Methods Failed rate limit (warning threshold) | string | `0` | no |
 | iothub_failed_c2d_twin_read_rate_threshold_critical | C2D Twin Read Failed rate limit (critical threshold) | string | `10` | no |
 | iothub_failed_c2d_twin_read_rate_threshold_warning | C2D Twin Read Failed rate limit (warning threshold) | string | `0` | no |
 | iothub_failed_c2d_twin_update_rate_threshold_critical | C2D Twin Update Failed rate limit (critical threshold) | string | `10` | no |
 | iothub_failed_c2d_twin_update_rate_threshold_warning | C2D Twin Update Failed rate limit (warning threshold) | string | `0` | no |
 | iothub_failed_d2c_twin_read_rate_threshold_critical | D2C Twin Read Failed rate limit (critical threshold) | string | `10` | no |
 | iothub_failed_d2c_twin_read_rate_threshold_warning | D2C Twin Read Failed rate limit (warning threshold) | string | `0` | no |
 | iothub_failed_d2c_twin_update_rate_threshold_critical | D2C Twin Update Failed rate limit (critical threshold) | string | `10` | no |
 | iothub_failed_d2c_twin_update_rate_threshold_warning | D2C Twin Update Failed rate limit (warning threshold) | string | `0` | no |
 | iothub_failed_jobs_rate_threshold_critical | Jobs Failed rate limit (critical threshold) | string | `10` | no |
 | iothub_failed_jobs_rate_threshold_warning | Jobs Failed rate limit (warning threshold) | string | `0` | no |
 | iothub_failed_listjobs_rate_threshold_critical | ListJobs Failed rate limit (critical threshold) | string | `10` | no |
 | iothub_failed_listjobs_rate_threshold_warning | ListJobs Failed rate limit (warning threshold) | string | `0` | no |
 | iothub_failed_queryjobs_rate_threshold_critical | QueryJobs Failed rate limit (critical threshold) | string | `10` | no |
 | iothub_failed_queryjobs_rate_threshold_warning | QueryJobs Failed rate limit (warning threshold) | string | `0` | no |
 | iothub_fallback_d2c_telemetry_egress_threshold_critical | D2C Telemetry Fallback limit (critical threshold) | string | `1000` | no |
 | iothub_fallback_d2c_telemetry_egress_threshold_warning | D2C Telemetry Fallback limit (warning threshold) | string | `500` | no |
 | iothub_invalid_d2c_telemetry_egress_threshold_critical | D2C Telemetry Invalid limit (critical threshold) | string | `1000` | no |
 | iothub_invalid_d2c_telemetry_egress_threshold_warning | D2C Telemetry Invalid limit (warning threshold) | string | `500` | no |
 | iothub_orphaned_d2c_telemetry_egress_threshold_critical | D2C Telemetry Orphaned limit (critical threshold) | string | `1000` | no |
 | iothub_orphaned_d2c_telemetry_egress_threshold_warning | D2C Telemetry Orphaned limit (warning threshold) | string | `500` | no |
 | message | Message sent when a monitor is triggered | string | - | yes |
 | non_taggable_filter_tags | Tags used for filtering for components without tag support | string | `*` | no |
 | redis_evictedkeys_limit_threshold_critical | Evicted keys limit (critical threshold) | string | `100` | no |
 | redis_evictedkeys_limit_threshold_warning | Evicted keys limit (warning threshold) | string | `0` | no |
 | redis_percent_processor_time_threshold_critical | Processor time percent (critical threshold) | string | `80` | no |
 | redis_percent_processor_time_threshold_warning | Processor time percent (warning threshold) | string | `60` | no |
 | redis_server_load_rate_threshold_critical | Server CPU load rate (critical threshold) | string | `90` | no |
 | redis_server_load_rate_threshold_warning | Server CPU load rate (warning threshold) | string | `70` | no |
 | sqldatabase_cpu_threshold_critical | CPU usage in percent (critical threshold) | string | `90` | no |
 | sqldatabase_cpu_threshold_warning | CPU usage in percent (warning threshold) | string | `80` | no |
 | sqldatabase_deadlock_threshold_critical | Amount of Deadlocks (critical threshold) | string | `1` | no |
 | sqldatabase_diskspace_threshold_critical | Disk space used in percent (critical threshold) | string | `90` | no |
 | sqldatabase_diskspace_threshold_warning | Disk space used in percent (warning threshold) | string | `80` | no |
 | sqldatabase_dtu_threshold_critical | Amount of DTU used (critical threshold) | string | `90` | no |
 | sqldatabase_dtu_threshold_warning | Amount of DTU used (warning threshold) | string | `85` | no |
 | storage_authorization_error_requests_threshold_critical | Maximum acceptable percent of authorization error requests for a storage | string | `15` | no |
 | storage_availability_threshold_critical | Minimum acceptable percent of availability for a storage | string | `90` | no |
 | storage_client_other_error_requests_threshold_critical | Maximum acceptable percent of client other error requests for a storage | string | `15` | no |
 | storage_latency_threshold_critical | Maximum acceptable end to end latency (ms) for a storage | string | `1000` | no |
 | storage_network_error_requests_threshold_critical | Maximum acceptable percent of network error requests for a storage | string | `5` | no |
 | storage_server_other_error_requests_threshold_critical | Maximum acceptable percent of server other error requests for a storage | string | `10` | no |
 | storage_successful_requests_threshold_critical | Minimum acceptable percent of successful requests for a storage | string | `90` | no |
 | storage_throttling_error_requests_threshold_critical | Maximum acceptable percent of throttling error requests for a storage | string | `10` | no |
 | storage_timeout_error_requests_threshold_critical | Maximum acceptable percent of timeout error requests for a storage | string | `5` | no |
 | streamanalytics_conversion_errors_threshold_critical | Conversion errors limit (critical threshold) | string | `10` | no |
 | streamanalytics_conversion_errors_threshold_warning | Conversion errors limit (warning threshold) | string | `0` | no |
 | streamanalytics_failed_function_requests_threshold_critical | Failed Function Request rate limit (critical threshold) | string | `10` | no |
 | streamanalytics_function_requests_threshold_warning | Failed Function Request rate limit (warning threshold) | string | `0` | no |
 | streamanalytics_runtime_errors_threshold_critical | Runtime errors limit (critical threshold) | string | `10` | no |
 | streamanalytics_runtime_errors_threshold_warning | Runtime errors limit (warning threshold) | string | `0` | no |
 | streamanalytics_su_utilization_threshold_critical | Streaming Unit utilization rate limit (critical threshold) | string | `80` | no |
 | streamanalytics_su_utilization_threshold_warning | Streaming Unit utilization rate limit (warning threshold) | string | `60` | no |
 Related documentation
 ---------------------
 DataDog documentation: [https://docs.datadoghq.com/integrations/azure/](https://docs.datadoghq.com/integrations/azure/)
 Azure metrics documentation: [https://docs.microsoft.com/en-us/azure/monitoring-and-diagnostics/monitoring-overview-metrics](https://docs.microsoft.com/en-us/azure/monitoring-and-diagnostics/monitoring-overview-metrics)
--- a/cloud/azure/apimanagement/README.md
+++ b/cloud/azure/apimanagement/README.md
@ -0,0 +1,43 @@
 Azure API Management Datadog monitors
 =====================================
 How to use this module
 ----------------------
 ```
 module "datadog-monitors-azure-apimanagement" {
  source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/apimanagement?ref={revision}"
  message     = "${module.datadog-message-alerting.alerting-message}"
  environment = "${var.environment}"
 }
 ```
 Purpose
 -------
 Creates Datadog monitors with the following checks :
 * Service status
 * Failed requests ratio
 * Other requests ratio
 * Unauthorized requests ratio
 * Successful requests ratio
 Inputs
 ------
 | Name | Description | Type | Default | Required |
 |------|-------------|:----:|:-----:|:-----:|
 | delay | Delay in seconds for the metric evaluation | string | `600` | no |
 | environment | Architecture environment | string | - | yes |
 | failed_requests_threshold_critical | Maximum acceptable percent of failed requests | string | `5` | no |
 | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
 | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
 | message | Message sent when a Redis monitor is triggered | string | - | yes |
 | other_requests_threshold_critical | Maximum acceptable percent of other requests | string | `5` | no |
 | successful_requests_threshold_critical | Minimum acceptable percent of successful requests | string | `90` | no |
 | unauthorized_requests_threshold_critical | Maximum acceptable percent of unauthorized requests | string | `5` | no |
 Related documentation
 ---------------------
 Azure API Management metrics documentation: [https://docs.microsoft.com/en-us/azure/api-management/api-management-howto-use-azure-monitor](https://docs.microsoft.com/en-us/azure/api-management/api-management-howto-use-azure-monitor)
--- a/cloud/azure/apimanagement/inputs.tf
+++ b/cloud/azure/apimanagement/inputs.tf
@ -0,0 +1,46 @@
 # Global Terraform
 variable "environment" {
  description = "Architecture environment"
  type = "string"
 }
 # Global DataDog
 variable "message" {
  description = "Message sent when a Redis monitor is triggered"
 }
 variable "delay" {
  description = "Delay in seconds for the metric evaluation"
  default     = 600
 }
 variable "filter_tags_use_defaults" {
  description = "Use default filter tags convention"
  default     = "true"
 }
 variable "filter_tags_custom" {
  description = "Tags used for custom filtering when filter_tags_use_defaults is false"
  default     = "*"
 }
 # Azure API Management specific
 variable "failed_requests_threshold_critical" {
  description = "Maximum acceptable percent of failed requests"
  default = 5
 }
 variable "other_requests_threshold_critical" {
  description = "Maximum acceptable percent of other requests"
  default = 5
 }
 variable "unauthorized_requests_threshold_critical" {
  description = "Maximum acceptable percent of unauthorized requests"
  default = 5
 }
 variable "successful_requests_threshold_critical" {
  description = "Minimum acceptable percent of successful requests"
  default = 90
 }
--- a/cloud/azure/apimanagement/monitors-azure-apimanagement.tf
+++ b/cloud/azure/apimanagement/monitors-azure-apimanagement.tf
@ -0,0 +1,156 @@
 data "template_file" "filter" {
  template = "$${filter}"
  vars {
    filter = "${var.filter_tags_use_defaults == "true" ?
             format("dd_monitoring:enabled,dd_azure_apimanagement:enabled,env:%s", var.environment) :
             "${var.filter_tags_custom}"}"
  }
 }
 resource "datadog_monitor" "apimgt_status" {
  name    = "[${var.environment}] API Management status is not ok on {{name}}"
  message = "${var.message}"
  query = <<EOF
      avg(last_5m):avg:azure.apimanagement_service.status{${data.template_file.filter.rendered}} by {resource_group,region,name} < 1
      EOF
  type  = "metric alert"
  thresholds {
    critical = 1
  }
  notify_no_data      = true
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:apimanagement", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "apimgt_failed_requests" {
  name    = "[${var.environment}] API Management {{name}} too much failed requests"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m): (
      avg:azure.apimanagement_service.failed_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
      avg:azure.apimanagement_service.total_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() * 100
    ) > ${var.failed_requests_threshold_critical}
    EOF
  thresholds {
    critical  = "${var.failed_requests_threshold_critical}"
  }
  type                = "metric alert"
  notify_no_data      = false
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:apimanagement", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "apimgt_other_requests" {
  name    = "[${var.environment}] API Management {{name}} too much other requests"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m): (
      avg:azure.apimanagement_service.other_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
      avg:azure.apimanagement_service.total_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() * 100
    ) > ${var.other_requests_threshold_critical}
    EOF
  thresholds {
    critical  = "${var.other_requests_threshold_critical}"
  }
  type                = "metric alert"
  notify_no_data      = false
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:apimanagement", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "apimgt_unauthorized_requests" {
  name    = "[${var.environment}] API Management {{name}} too much unauthorized requests"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m): (
      avg:azure.apimanagement_service.unauthorized_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
      avg:azure.apimanagement_service.total_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() * 100
    ) > ${var.unauthorized_requests_threshold_critical}
    EOF
  thresholds {
    critical  = "${var.unauthorized_requests_threshold_critical}"
  }
  type                = "metric alert"
  notify_no_data      = false
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:apimanagement", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "apimgt_successful_requests" {
  name    = "[${var.environment}] API Management {{name}} successful requests rate too low"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m): (
      avg:azure.apimanagement_service.successful_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
      avg:azure.apimanagement_service.total_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() * 100
    ) < ${var.successful_requests_threshold_critical}
    EOF
  thresholds {
    critical  = "${var.successful_requests_threshold_critical}"
  }
  type                = "metric alert"
  notify_no_data      = false
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:apimanagement", "team:azure", "provider:azure"]
 }
--- a/cloud/azure/app-services/README.md
+++ b/cloud/azure/app-services/README.md
@ -0,0 +1,50 @@
 Azure AppServices (Web, API, Functions) DataDog monitors
 ========================================================
 How to use this module
 ----------------------
 ```
 module "datadog-monitors-azure-app-services" {
  source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/app-services?ref={revision}"
  message     = "${module.datadog-message-alerting.alerting-message}"
  environment = "${var.environment}"
 }
 ```
 Purpose
 -------
 Creates a DataDog monitors with the following checks :
 * Response time
 * Memory usage count
 * HTTP 5xx requests
 * HTTP 4xx requests
 * HTTP 2xx requests
 Inputs
 ------
 | Name | Description | Type | Default | Required |
 |------|-------------|:----:|:-----:|:-----:|
 | delay | Delay in seconds for the metric evaluation | string | `600` | no |
 | environment | Architecture environment | string | - | yes |
 | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
 | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
 | http_2xx_requests_threshold_critical | Minimum critical acceptable percent of 2xx requests | string | `90` | no |
 | http_2xx_requests_threshold_warning | Minimum warning acceptable percent of 2xx requests | string | `95` | no |
 | http_4xx_requests_threshold_critical | Maximum critical acceptable percent of 4xx errors | string | `30` | no |
 | http_4xx_requests_threshold_warning | Maximum warning acceptable percent of 4xx errors | string | `15` | no |
 | http_5xx_requests_threshold_critical | Maximum critical acceptable percent of 5xx errors | string | `20` | no |
 | http_5xx_requests_threshold_warning | Maximum warning acceptable percent of 5xx errors | string | `10` | no |
 | memory_usage_threshold_critical | Alerting threshold in Mib | string | `52430000` | no |
 | memory_usage_threshold_warning | Warning threshold in MiB | string | `33550000` | no |
 | message | Message sent when a monitor is triggered | string | - | yes |
 | response_time_threshold_critical | Alerting threshold in seconds | string | `0.8` | no |
 | response_time_threshold_warning | Warning threshold in seconds | string | `0.4` | no |
 Related documentation
 ---------------------
 DataDog documentation: [https://docs.datadoghq.com/integrations/azure_app_services](https://docs.datadoghq.com/integrations/azure_app_services)
--- a/cloud/azure/app-services/inputs.tf
+++ b/cloud/azure/app-services/inputs.tf
@ -0,0 +1,93 @@
 variable "environment" {
  description = "Architecture environment"
  type        = "string"
 }
 variable "filter_tags_use_defaults" {
  description = "Use default filter tags convention"
  default     = "true"
 }
 variable "filter_tags_custom" {
  description = "Tags used for custom filtering when filter_tags_use_defaults is false"
  default     = "*"
 }
 variable "message" {
  description = "Message sent when a monitor is triggered"
 }
 variable "delay" {
  description = "Delay in seconds for the metric evaluation"
  default     = 600
 }
 ###################################
 ###   RESPONSE TIME VARIABLES   ###
 ###################################
 variable "response_time_threshold_critical" {
  default     = 0.8
  description = "Alerting threshold in seconds"
 }
 variable "response_time_threshold_warning" {
  default     = 0.4
  description = "Warning threshold in seconds"
 }
 ###################################
 ###   MEMORY USAGE VARIABLES   ###
 ###################################
 variable "memory_usage_threshold_critical" {
  default     = 52430000
  description = "Alerting threshold in Mib"
 }
 variable "memory_usage_threshold_warning" {
  default     = 33550000
  description = "Warning threshold in MiB"
 }
 #################################
 ###   HTTP 5xx status pages   ###
 #################################
 variable "http_5xx_requests_threshold_critical" {
  default     = 20
  description = "Maximum critical acceptable percent of 5xx errors"
 }
 variable "http_5xx_requests_threshold_warning" {
  default     = 10
  description = "Maximum warning acceptable percent of 5xx errors"
 }
 #################################
 ###   HTTP 4xx status pages   ###
 #################################
 variable "http_4xx_requests_threshold_critical" {
  default     = 30
  description = "Maximum critical acceptable percent of 4xx errors"
 }
 variable "http_4xx_requests_threshold_warning" {
  default     = 15
  description = "Maximum warning acceptable percent of 4xx errors"
 }
 #################################
 ###   HTTP 2xx status pages   ###
 #################################
 variable "http_2xx_requests_threshold_critical" {
  default     = 90
  description = "Minimum critical acceptable percent of 2xx requests"
 }
 variable "http_2xx_requests_threshold_warning" {
  default     = 95
  description = "Minimum warning acceptable percent of 2xx requests"
 }
--- a/cloud/azure/app-services/monitors-app_services.tf
+++ b/cloud/azure/app-services/monitors-app_services.tf
@ -0,0 +1,155 @@
 data "template_file" "filter" {
  template = "$${filter}"
  vars {
    filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
  }
 }
 # Monitoring App Services response time
 resource "datadog_monitor" "appservices_response_time" {
  name    = "[${var.environment}] App Services response time of {{value}}s is to high on {{name}}"
  type    = "metric alert"
  message = "${var.message}"
  query = <<EOF
    avg(last_5m): (
      avg:azure.app_services.average_response_time{${data.template_file.filter.rendered}} by {resource_group,region,name}
    ) > ${var.response_time_threshold_critical}
  EOF
  evaluation_delay = "${var.delay}"
  new_host_delay   = "${var.delay}"
  thresholds {
    warning  = "${var.response_time_threshold_warning}"
    critical = "${var.response_time_threshold_critical}"
  }
  notify_no_data      = true  # Will notify when no data is received
  renotify_interval   = 0
  require_full_window = true
  timeout_h           = 0
  include_tags        = true
  tags = ["env:${var.environment}", "resource:appservices", "team:azure", "provider:azure"]
 }
 # Monitoring App Services memory usage
 resource "datadog_monitor" "appservices_memory_usage_count" {
  name    = "[${var.environment}] App Services memory usage > ${ceil(var.memory_usage_threshold_critical/1000000)}MiB on {{name}}"
  type    = "metric alert"
  message = "${var.message}"
  query = <<EOF
    avg(last_5m): (
      avg:azure.app_services.memory_working_set{${data.template_file.filter.rendered}} by {resource_group,region,name}
    ) > ${var.memory_usage_threshold_critical}
  EOF
  evaluation_delay = "${var.delay}"
  new_host_delay   = "${var.delay}"
  thresholds {
    warning  = "${var.memory_usage_threshold_warning}"
    critical = "${var.memory_usage_threshold_critical}"
  }
  notify_no_data      = true                                      # Will notify when no data is received
  renotify_interval   = 0
  require_full_window = true
  timeout_h           = 0
  include_tags        = true
  tags = ["env:${var.environment}", "resource:appservices", "team:azure", "provider:azure"]
 }
 # Monitoring App Services 5xx errors percent
 resource "datadog_monitor" "appservices_http_5xx_errors_count" {
  name    = "[${var.environment}] App Services HTTP 5xx errors is {{value}}% above the limit on {{name}}"
  type    = "metric alert"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m): (
      avg:azure.app_services.http5xx{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
      avg:azure.app_services.requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
    ) * 100 > ${var.http_5xx_requests_threshold_critical}
  EOF
  evaluation_delay = "${var.delay}"
  new_host_delay   = "${var.delay}"
  thresholds {
    warning  = "${var.http_5xx_requests_threshold_warning}"
    critical = "${var.http_5xx_requests_threshold_critical}"
  }
  notify_no_data      = false # Will NOT notify when no data is received
  renotify_interval   = 0
  require_full_window = true
  timeout_h           = 0
  include_tags        = true
  tags = ["env:${var.environment}", "resource:appservices", "team:azure", "provider:azure"]
 }
 # Monitoring App Services 4xx errors percent
 resource "datadog_monitor" "appservices_http_4xx_errors_count" {
  name    = "[${var.environment}] App Services HTTP 4xx errors is {{value}}% above the limit on {{name}}"
  type    = "metric alert"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m): (
      avg:azure.app_services.http4xx{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
      avg:azure.app_services.requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
    ) * 100 > ${var.http_4xx_requests_threshold_critical}
  EOF
  evaluation_delay = "${var.delay}"
  new_host_delay   = "${var.delay}"
  thresholds {
    warning  = "${var.http_4xx_requests_threshold_warning}"
    critical = "${var.http_4xx_requests_threshold_critical}"
  }
  notify_no_data      = false # Will NOT notify when no data is received
  renotify_interval   = 0
  require_full_window = true
  timeout_h           = 0
  include_tags        = true
  tags = ["env:${var.environment}", "resource:appservices", "team:azure", "provider:azure"]
 }
 # Monitoring App Services HTTP 2xx status pages percent
 resource "datadog_monitor" "appservices_http_2xx_status_rate" {
  name    = "[${var.environment}] App Services HTTP 2xx responses is {{value}}% below the limit on {{name}}"
  type    = "metric alert"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m): (
      avg:azure.app_services.http2xx{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
      avg:azure.app_services.requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
    ) * 100 < ${var.http_2xx_requests_threshold_critical}
  EOF
  evaluation_delay = "${var.delay}"
  new_host_delay   = "${var.delay}"
  thresholds {
    warning  = "${var.http_2xx_requests_threshold_warning}"
    critical = "${var.http_2xx_requests_threshold_critical}"
  }
  notify_no_data      = false  # Will notify when no data is received
  renotify_interval   = 0
  require_full_window = true
  timeout_h           = 0
  include_tags        = true
  tags = ["env:${var.environment}", "resource:appservices", "team:azure", "provider:azure"]
 }
--- a/cloud/azure/eventhub/README.md
+++ b/cloud/azure/eventhub/README.md
@ -0,0 +1,54 @@
 Event Hub Datadog monitor
 =========================
 How to use this module
 ----------------------
 ```
 module "datadog-monitors-azure-eventhub" {
  source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/eventhub?ref={revision}"
  message = "${module.datadog-message-alerting.alerting-message}"
  environment = "${var.environment}"
  subscription_id = "${var.subscription_id}"
 }
 ```
 Purpose
 -------
 Creates a Datadog monitor with the following checks :
 * Service status check
 * Failed request ratio
 * Erroneous requests ratio
 Inputs
 ------
 | Name | Description | Type | Default | Required |
 |------|-------------|:----:|:-----:|:-----:|
 | delay | Delay in seconds for the metric evaluation | string | `600` | no |
 | environment | Architecture environment | string | - | yes |
 | errors_rate_thresold_critical | Errors ratio (percentage) to trigger the critical alert | string | `3` | no |
 | errors_rate_thresold_warning | Errors ratio (percentage) to trigger a warning alert | string | `1` | no |
 | failed_requests_rate_thresold_critical | Failed requests ratio (percentage) to trigger the critical alert | string | `3` | no |
 | failed_requests_rate_thresold_warning | Failed requests ratio (percentage) to trigger a warning alert | string | `1` | no |
 | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
 | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
 | message | Message sent when an alert is triggered | string | - | yes |
 Outputs
 -------
 | Name | Description |
 |------|-------------|
 | errors_monitor_id | Id of the `errors` monitor |
 | failed_requests_monitor_id | Id of the `failed requests` monitor |
 | status_monitor_id | Id of the `status` monitor |
 Related documentation
 ---------------------
 Datadog documentation : [https://docs.datadoghq.com/integrations/azure_event_hub/](https://docs.datadoghq.com/integrations/azure_event_hub/)
 Azure metrics documentation : [https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-metrics-azure-monitor](https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-metrics-azure-monitor)
--- a/cloud/azure/eventhub/inputs.tf
+++ b/cloud/azure/eventhub/inputs.tf
@ -0,0 +1,45 @@
 # Global Terraform
 variable "environment" {
  description = "Architecture environment"
  type = "string"
 }
 # Global DataDog
 variable "message" {
  description = "Message sent when an alert is triggered"
 }
 variable "delay" {
  description = "Delay in seconds for the metric evaluation"
  default = 600
 }
 variable "filter_tags_use_defaults" {
  description = "Use default filter tags convention"
  default     = "true"
 }
 variable "filter_tags_custom" {
  description = "Tags used for custom filtering when filter_tags_use_defaults is false"
  default     = "*"
 }
 variable "failed_requests_rate_thresold_critical" {
  description = "Failed requests ratio (percentage) to trigger the critical alert"
  default = 3
 }
 variable "failed_requests_rate_thresold_warning" {
  description = "Failed requests ratio (percentage) to trigger a warning alert"
  default = 1
 }
 variable "errors_rate_thresold_critical" {
  description = "Errors ratio (percentage) to trigger the critical alert"
  default = 3
 }
 variable "errors_rate_thresold_warning" {
  description = "Errors ratio (percentage) to trigger a warning alert"
  default = 1
 }
--- a/cloud/azure/eventhub/monitors-eventhub.tf
+++ b/cloud/azure/eventhub/monitors-eventhub.tf
@ -0,0 +1,100 @@
 data "template_file" "filter" {
  template = "$${filter}"
  vars {
    filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_eventhub:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
  }
 }
 resource "datadog_monitor" "eventhub_status" {
  name    = "[${var.environment}] Event Hub status is not ok on {{name}}"
  message = "${var.message}"
  query = <<EOF
      avg(last_5m): avg:azure.eventhub_namespaces.status{${data.template_file.filter.rendered}} by {resource_group,region,name} != 1
      EOF
  type  = "metric alert"
  notify_no_data      = true
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:eventhub", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "eventhub_failed_requests" {
  name    = "[${var.environment}] Event Hub too much failed requests on {{name}}"
  message = "${var.message}"
  query = <<EOF
        sum(last_5m): (
          avg:azure.eventhub_namespaces.failed_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() ) /
        (
          avg:azure.eventhub_namespaces.successful_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() +
          avg:azure.eventhub_namespaces.failed_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
        ) * 100 > ${var.failed_requests_rate_thresold_critical}
        EOF
  type  = "metric alert"
  thresholds {
    critical = "${var.failed_requests_rate_thresold_critical}"
    warning  = "${var.failed_requests_rate_thresold_warning}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:eventhub", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "eventhub_errors" {
  name    = "[${var.environment}] Event Hub too much errors on {{name}}"
  message = "${var.message}"
  query = <<EOF
        sum(last_5m): (
          avg:azure.eventhub_namespaces.internal_server_errors{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() +
          avg:azure.eventhub_namespaces.server_busy_errors{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() +
          avg:azure.eventhub_namespaces.other_errors{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
        ) / (
          avg:azure.eventhub_namespaces.successful_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() +
          avg:azure.eventhub_namespaces.internal_server_errors{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() +
          avg:azure.eventhub_namespaces.server_busy_errors{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() +
          avg:azure.eventhub_namespaces.other_errors{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
        ) * 100 > ${var.errors_rate_thresold_critical}
        EOF
  type  = "metric alert"
  thresholds {
    critical = "${var.errors_rate_thresold_critical}"
    warning  = "${var.errors_rate_thresold_warning}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:eventhub", "team:azure", "provider:azure"]
 }
--- a/cloud/azure/eventhub/outputs.tf
+++ b/cloud/azure/eventhub/outputs.tf
@ -0,0 +1,11 @@
 output "status_monitor_id" {
  value = "${datadog_monitor.eventhub_failed_requests.id}"
 }
 output "failed_requests_monitor_id" {
  value = "${datadog_monitor.eventhub_status.id}"
 }
 output "errors_monitor_id" {
  value = "${datadog_monitor.eventhub_errors.id}"
 }
--- a/cloud/azure/inputs.tf
+++ b/cloud/azure/inputs.tf
@ -0,0 +1,397 @@
 variable "environment" {
  description = "Architecture environment"
  type        = "string"
 }
 variable "message" {
  description = "Message sent when a monitor is triggered"
  type        = "string"
 }
 variable "delay" {
  description = "Delay in seconds for the metric evaluation"
  default     = 600
 }
 variable "filter_tags_use_defaults" {
  description = "Use default filter tags convention"
  default     = "true"
 }
 variable "filter_tags_custom" {
  description = "Tags used for custom filtering when filter_tags_use_defaults is false"
  default     = "*"
 }
 variable "non_taggable_filter_tags" {
  description = "Tags used for filtering for components without tag support"
  default     = "*"
 }
 # Azure API Management specific variables
 variable "apimanagement_failed_requests_threshold_critical" {
  description = "Maximum acceptable percent of failed requests"
  default     = 5
 }
 variable "apimanagement_other_requests_threshold_critical" {
  description = "Maximum acceptable percent of other requests"
  default     = 5
 }
 variable "apimanagement_unauthorized_requests_threshold_critical" {
  description = "Maximum acceptable percent of unauthorized requests"
  default     = 5
 }
 variable "apimanagement_successful_requests_threshold_critical" {
  description = "Minimum acceptable percent of successful requests"
  default     = 90
 }
 # Azure App Services specific variables
 variable "appservices_response_time_threshold_critical" {
  default     = 0.8
  description = "Alerting threshold in seconds"
 }
 variable "appservices_response_time_threshold_warning" {
  default     = 0.4
  description = "Warning threshold in seconds"
 }
 variable "appservices_memory_usage_threshold_critical" {
  default     = 52430000
  description = "Alerting threshold in Mib"
 }
 variable "appservices_memory_usage_threshold_warning" {
  default     = 33550000
  description = "Warning threshold in MiB"
 }
 variable "appservices_http_4xx_requests_threshold_critical" {
  default     = 30
  description = "Maximum critical acceptable percent of 4xx errors"
 }
 variable "appservices_http_4xx_requests_threshold_warning" {
  default     = 15
  description = "Maximum warning acceptable percent of 4xx errors"
 }
 variable "appservices_http_5xx_requests_threshold_critical" {
  default     = 20
  description = "Maximum critical acceptable percent of 5xx errors"
 }
 variable "appservices_http_5xx_requests_threshold_warning" {
  default     = 10
  description = "Maximum warning acceptable percent of 5xx errors"
 }
 variable "appservices_http_2xx_requests_threshold_critical" {
  default     = 90
  description = "Minimum critical acceptable percent of 2xx requests"
 }
 variable "appservices_http_2xx_requests_threshold_warning" {
  default     = 95
  description = "Minimum warning acceptable percent of 2xx requests"
 }
 # Azure Event Hub specific variables
 variable "eventhub_failed_requests_rate_thresold_critical" {
  description = "Failed requests ratio (percentage) to trigger the critical alert"
  default     = 3
 }
 variable "eventhub_failed_requests_rate_thresold_warning" {
  description = "Failed requests ratio (percentage) to trigger a warning alert"
  default     = 1
 }
 variable "eventhub_errors_rate_thresold_critical" {
  description = "Errors ratio (percentage) to trigger the critical alert"
  default     = 3
 }
 variable "eventhub_errors_rate_thresold_warning" {
  description = "Errors ratio (percentage) to trigger a warning alert"
  default     = 1
 }
 # IOT Hub specific variables
 variable "iothub_failed_jobs_rate_threshold_warning" {
  description = "Jobs Failed rate limit (warning threshold)"
  default     = 0
 }
 variable "iothub_failed_jobs_rate_threshold_critical" {
  description = "Jobs Failed rate limit (critical threshold)"
  default     = 10
 }
 variable "iothub_failed_listjobs_rate_threshold_warning" {
  description = "ListJobs Failed rate limit (warning threshold)"
  default     = 0
 }
 variable "iothub_failed_listjobs_rate_threshold_critical" {
  description = "ListJobs Failed rate limit (critical threshold)"
  default     = 10
 }
 variable "iothub_failed_queryjobs_rate_threshold_warning" {
  description = "QueryJobs Failed rate limit (warning threshold)"
  default     = 0
 }
 variable "iothub_failed_queryjobs_rate_threshold_critical" {
  description = "QueryJobs Failed rate limit (critical threshold)"
  default     = 10
 }
 variable "iothub_failed_c2d_methods_rate_threshold_warning" {
  description = "C2D Methods Failed rate limit (warning threshold)"
  default     = 0
 }
 variable "iothub_failed_c2d_methods_rate_threshold_critical" {
  description = "C2D Methods Failed rate limit (critical threshold)"
  default     = 10
 }
 variable "iothub_failed_c2d_twin_read_rate_threshold_warning" {
  description = "C2D Twin Read Failed rate limit (warning threshold)"
  default     = 0
 }
 variable "iothub_failed_c2d_twin_read_rate_threshold_critical" {
  description = "C2D Twin Read Failed rate limit (critical threshold)"
  default     = 10
 }
 variable "iothub_failed_c2d_twin_update_rate_threshold_warning" {
  description = "C2D Twin Update Failed rate limit (warning threshold)"
  default     = 0
 }
 variable "iothub_failed_c2d_twin_update_rate_threshold_critical" {
  description = "C2D Twin Update Failed rate limit (critical threshold)"
  default     = 10
 }
 variable "iothub_failed_d2c_twin_read_rate_threshold_warning" {
  description = "D2C Twin Read Failed rate limit (warning threshold)"
  default     = 0
 }
 variable "iothub_failed_d2c_twin_read_rate_threshold_critical" {
  description = "D2C Twin Read Failed rate limit (critical threshold)"
  default     = 10
 }
 variable "iothub_failed_d2c_twin_update_rate_threshold_warning" {
  description = "D2C Twin Update Failed rate limit (warning threshold)"
  default     = 0
 }
 variable "iothub_failed_d2c_twin_update_rate_threshold_critical" {
  description = "D2C Twin Update Failed rate limit (critical threshold)"
  default     = 10
 }
 variable "iothub_dropped_d2c_telemetry_egress_threshold_warning" {
  description = "D2C Telemetry Dropped limit (warning threshold)"
  default     = 500
 }
 variable "iothub_dropped_d2c_telemetry_egress_threshold_critical" {
  description = "D2C Telemetry Dropped limit (critical threshold)"
  default     = 1000
 }
 variable "iothub_orphaned_d2c_telemetry_egress_threshold_warning" {
  description = "D2C Telemetry Orphaned limit (warning threshold)"
  default     = 500
 }
 variable "iothub_orphaned_d2c_telemetry_egress_threshold_critical" {
  description = "D2C Telemetry Orphaned limit (critical threshold)"
  default     = 1000
 }
 variable "iothub_invalid_d2c_telemetry_egress_threshold_warning" {
  description = "D2C Telemetry Invalid limit (warning threshold)"
  default     = 500
 }
 variable "iothub_invalid_d2c_telemetry_egress_threshold_critical" {
  description = "D2C Telemetry Invalid limit (critical threshold)"
  default     = 1000
 }
 variable "iothub_fallback_d2c_telemetry_egress_threshold_warning" {
  description = "D2C Telemetry Fallback limit (warning threshold)"
  default     = 500
 }
 variable "iothub_fallback_d2c_telemetry_egress_threshold_critical" {
  description = "D2C Telemetry Fallback limit (critical threshold)"
  default     = 1000
 }
 # Azure Redis specific variables
 variable "redis_evictedkeys_limit_threshold_warning" {
  description = "Evicted keys limit (warning threshold)"
  default     = 0
 }
 variable "redis_evictedkeys_limit_threshold_critical" {
  description = "Evicted keys limit (critical threshold)"
  default     = 100
 }
 variable "redis_percent_processor_time_threshold_critical" {
  description = "Processor time percent (critical threshold)"
  default     = 80
 }
 variable "redis_percent_processor_time_threshold_warning" {
  description = "Processor time percent (warning threshold)"
  default     = 60
 }
 variable "redis_server_load_rate_threshold_critical" {
  description = "Server CPU load rate (critical threshold)"
  default     = 90
 }
 variable "redis_server_load_rate_threshold_warning" {
  description = "Server CPU load rate (warning threshold)"
  default     = 70
 }
 # Azure SQL Database specific variables
 variable "sqldatabase_cpu_threshold_warning" {
  description = "CPU usage in percent (warning threshold)"
  default     = "80"
 }
 variable "sqldatabase_cpu_threshold_critical" {
  description = "CPU usage in percent (critical threshold)"
  default     = "90"
 }
 variable "sqldatabase_diskspace_threshold_warning" {
  description = "Disk space used in percent (warning threshold)"
  default     = "80"
 }
 variable "sqldatabase_diskspace_threshold_critical" {
  description = "Disk space used in percent (critical threshold)"
  default     = "90"
 }
 variable "sqldatabase_dtu_threshold_warning" {
  description = "Amount of DTU used (warning threshold)"
  default     = "85"
 }
 variable "sqldatabase_dtu_threshold_critical" {
  description = "Amount of DTU used (critical threshold)"
  default     = "90"
 }
 variable "sqldatabase_deadlock_threshold_critical" {
  description = "Amount of Deadlocks (critical threshold)"
  default     = "1"
 }
 # Azure Storage specific variables
 variable "storage_availability_threshold_critical" {
  description = "Minimum acceptable percent of availability for a storage"
  default     = 90
 }
 variable "storage_successful_requests_threshold_critical" {
  description = "Minimum acceptable percent of successful requests for a storage"
  default     = 90
 }
 variable "storage_latency_threshold_critical" {
  description = "Maximum acceptable end to end latency (ms) for a storage"
  default     = 1000
 }
 variable "storage_timeout_error_requests_threshold_critical" {
  description = "Maximum acceptable percent of timeout error requests for a storage"
  default     = 5
 }
 variable "storage_network_error_requests_threshold_critical" {
  description = "Maximum acceptable percent of network error requests for a storage"
  default     = 5
 }
 variable "storage_throttling_error_requests_threshold_critical" {
  description = "Maximum acceptable percent of throttling error requests for a storage"
  default     = 10
 }
 variable "storage_server_other_error_requests_threshold_critical" {
  description = "Maximum acceptable percent of server other error requests for a storage"
  default     = 10
 }
 variable "storage_client_other_error_requests_threshold_critical" {
  description = "Maximum acceptable percent of client other error requests for a storage"
  default     = 15
 }
 variable "storage_authorization_error_requests_threshold_critical" {
  description = "Maximum acceptable percent of authorization error requests for a storage"
  default     = 15
 }
 # Azure Stream Analytics specific variables
 variable "streamanalytics_su_utilization_threshold_warning" {
  description = "Streaming Unit utilization rate limit (warning threshold)"
  default     = 60
 }
 variable "streamanalytics_su_utilization_threshold_critical" {
  description = "Streaming Unit utilization rate limit (critical threshold)"
  default     = 80
 }
 variable "streamanalytics_function_requests_threshold_warning" {
  description = "Failed Function Request rate limit (warning threshold)"
  default     = 0
 }
 variable "streamanalytics_failed_function_requests_threshold_critical" {
  description = "Failed Function Request rate limit (critical threshold)"
  default     = 10
 }
 variable "streamanalytics_conversion_errors_threshold_warning" {
  description = "Conversion errors limit (warning threshold)"
  default     = 0
 }
 variable "streamanalytics_conversion_errors_threshold_critical" {
  description = "Conversion errors limit (critical threshold)"
  default     = 10
 }
 variable "streamanalytics_runtime_errors_threshold_warning" {
  description = "Runtime errors limit (warning threshold)"
  default     = 0
 }
 variable "streamanalytics_runtime_errors_threshold_critical" {
  description = "Runtime errors limit (critical threshold)"
  default     = 10
 }
--- a/cloud/azure/iothubs/README.md
+++ b/cloud/azure/iothubs/README.md
@ -0,0 +1,75 @@
 Azure IOT Hubs DataDog monitors
 ===============================
 How to use this module
 ----------------------
 ```
 module "iothubs" {
  source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/iothubs?ref=MON-80-azure-hub-iot-monitors"
  message     = "${module.datadog-message-alerting.alerting-message}"
  environment = "${var.environment}"
 }
 ```
 Purpose
 -------
 Creates a DataDog monitors with the following checks :
 * Service status check
 * Jobs failed average check
 * Query Jobs failed average check
 * List Jobs failed average check
 * Total devices count check
 * C2D methods failed average check
 * C2D twin read failed average check
 * C2D twin update failed average check
 * D2C twin read failed average check
 * D2C twin update failed average check
 * D2C telemetry egress dropped count check
 * D2C telemetry egress orphaned count check
 * D2C telemetry egress invalid count check
 * D2C telemetry egress fallback count check
 * D2C telemetry ingress no sent count check
 Inputs
 ------
 | Name | Description | Type | Default | Required |
 |------|-------------|:----:|:-----:|:-----:|
 | delay | Delay in seconds for the metric evaluation | string | `600` | no |
 | dropped_d2c_telemetry_egress_threshold_critical | D2C Telemetry Dropped limit (critical threshold) | string | `1000` | no |
 | dropped_d2c_telemetry_egress_threshold_warning | D2C Telemetry Dropped limit (warning threshold) | string | `500` | no |
 | environment | Architecture Environment | string | - | yes |
 | failed_c2d_methods_rate_threshold_critical | C2D Methods Failed rate limit (critical threshold) | string | `10` | no |
 | failed_c2d_methods_rate_threshold_warning | C2D Methods Failed rate limit (warning threshold) | string | `0` | no |
 | failed_c2d_twin_read_rate_threshold_critical | C2D Twin Read Failed rate limit (critical threshold) | string | `10` | no |
 | failed_c2d_twin_read_rate_threshold_warning | C2D Twin Read Failed rate limit (warning threshold) | string | `0` | no |
 | failed_c2d_twin_update_rate_threshold_critical | C2D Twin Update Failed rate limit (critical threshold) | string | `10` | no |
 | failed_c2d_twin_update_rate_threshold_warning | C2D Twin Update Failed rate limit (warning threshold) | string | `0` | no |
 | failed_d2c_twin_read_rate_threshold_critical | D2C Twin Read Failed rate limit (critical threshold) | string | `10` | no |
 | failed_d2c_twin_read_rate_threshold_warning | D2C Twin Read Failed rate limit (warning threshold) | string | `0` | no |
 | failed_d2c_twin_update_rate_threshold_critical | D2C Twin Update Failed rate limit (critical threshold) | string | `10` | no |
 | failed_d2c_twin_update_rate_threshold_warning | D2C Twin Update Failed rate limit (warning threshold) | string | `0` | no |
 | failed_jobs_rate_threshold_critical | Jobs Failed rate limit (critical threshold) | string | `10` | no |
 | failed_jobs_rate_threshold_warning | Jobs Failed rate limit (warning threshold) | string | `0` | no |
 | failed_listjobs_rate_threshold_critical | ListJobs Failed rate limit (critical threshold) | string | `10` | no |
 | failed_listjobs_rate_threshold_warning | ListJobs Failed rate limit (warning threshold) | string | `0` | no |
 | failed_queryjobs_rate_threshold_critical | QueryJobs Failed rate limit (critical threshold) | string | `10` | no |
 | failed_queryjobs_rate_threshold_warning | QueryJobs Failed rate limit (warning threshold) | string | `0` | no |
 | fallback_d2c_telemetry_egress_threshold_critical | D2C Telemetry Fallback limit (critical threshold) | string | `1000` | no |
 | fallback_d2c_telemetry_egress_threshold_warning | D2C Telemetry Fallback limit (warning threshold) | string | `500` | no |
 | filter_tags | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
 | invalid_d2c_telemetry_egress_threshold_critical | D2C Telemetry Invalid limit (critical threshold) | string | `1000` | no |
 | invalid_d2c_telemetry_egress_threshold_warning | D2C Telemetry Invalid limit (warning threshold) | string | `500` | no |
 | message | Message sent when an alert is triggered | string | - | yes |
 | orphaned_d2c_telemetry_egress_threshold_critical | D2C Telemetry Orphaned limit (critical threshold) | string | `1000` | no |
 | orphaned_d2c_telemetry_egress_threshold_warning | D2C Telemetry Orphaned limit (warning threshold) | string | `500` | no |
 Related documentation
 ---------------------
 DataDog documentation: [https://docs.datadoghq.com/integrations/azure_iot_hub](https://docs.datadoghq.com/integrations/azure_iot_hub)
 Azure IOT Hubs metrics documentation: [https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health](https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health)
--- a/cloud/azure/iothubs/inputs.tf
+++ b/cloud/azure/iothubs/inputs.tf
@ -0,0 +1,141 @@
 # Global Terraform
 variable "environment" {
  description = "Architecture Environment"
  type        = "string"
 }
 # Global DataDog
 variable "delay" {
  description = "Delay in seconds for the metric evaluation"
  default     = 600
 }
 variable "message" {
  description = "Message sent when an alert is triggered"
 }
 variable "filter_tags" {
  description = "Tags used for filtering"
  default = "*"
 }
 # Azure IOT hubs specific
 variable "failed_jobs_rate_threshold_warning" {
  description = "Jobs Failed rate limit (warning threshold)"
  default     = 0
 }
 variable "failed_jobs_rate_threshold_critical" {
  description = "Jobs Failed rate limit (critical threshold)"
  default     = 10
 }
 variable "failed_listjobs_rate_threshold_warning" {
  description = "ListJobs Failed rate limit (warning threshold)"
  default     = 0
 }
 variable "failed_listjobs_rate_threshold_critical" {
  description = "ListJobs Failed rate limit (critical threshold)"
  default     = 10
 }
 variable "failed_queryjobs_rate_threshold_warning" {
  description = "QueryJobs Failed rate limit (warning threshold)"
  default     = 0
 }
 variable "failed_queryjobs_rate_threshold_critical" {
  description = "QueryJobs Failed rate limit (critical threshold)"
  default     = 10
 }
 variable "failed_c2d_methods_rate_threshold_warning" {
  description = "C2D Methods Failed rate limit (warning threshold)"
  default     = 0
 }
 variable "failed_c2d_methods_rate_threshold_critical" {
  description = "C2D Methods Failed rate limit (critical threshold)"
  default     = 10
 }
 variable "failed_c2d_twin_read_rate_threshold_warning" {
  description = "C2D Twin Read Failed rate limit (warning threshold)"
  default     = 0
 }
 variable "failed_c2d_twin_read_rate_threshold_critical" {
  description = "C2D Twin Read Failed rate limit (critical threshold)"
  default     = 10
 }
 variable "failed_c2d_twin_update_rate_threshold_warning" {
  description = "C2D Twin Update Failed rate limit (warning threshold)"
  default     = 0
 }
 variable "failed_c2d_twin_update_rate_threshold_critical" {
  description = "C2D Twin Update Failed rate limit (critical threshold)"
  default     = 10
 }
 variable "failed_d2c_twin_read_rate_threshold_warning" {
  description = "D2C Twin Read Failed rate limit (warning threshold)"
  default     = 0
 }
 variable "failed_d2c_twin_read_rate_threshold_critical" {
  description = "D2C Twin Read Failed rate limit (critical threshold)"
  default     = 10
 }
 variable "failed_d2c_twin_update_rate_threshold_warning" {
  description = "D2C Twin Update Failed rate limit (warning threshold)"
  default     = 0
 }
 variable "failed_d2c_twin_update_rate_threshold_critical" {
  description = "D2C Twin Update Failed rate limit (critical threshold)"
  default     = 10
 }
 variable "dropped_d2c_telemetry_egress_threshold_warning" {
  description = "D2C Telemetry Dropped limit (warning threshold)"
  default     = 500
 }
 variable "dropped_d2c_telemetry_egress_threshold_critical" {
  description = "D2C Telemetry Dropped limit (critical threshold)"
  default     = 1000
 }
 variable "orphaned_d2c_telemetry_egress_threshold_warning" {
  description = "D2C Telemetry Orphaned limit (warning threshold)"
  default     = 500
 }
 variable "orphaned_d2c_telemetry_egress_threshold_critical" {
  description = "D2C Telemetry Orphaned limit (critical threshold)"
  default     = 1000
 }
 variable "invalid_d2c_telemetry_egress_threshold_warning" {
  description = "D2C Telemetry Invalid limit (warning threshold)"
  default     = 500
 }
 variable "invalid_d2c_telemetry_egress_threshold_critical" {
  description = "D2C Telemetry Invalid limit (critical threshold)"
  default     = 1000
 }
 variable "fallback_d2c_telemetry_egress_threshold_warning" {
  description = "D2C Telemetry Fallback limit (warning threshold)"
  default     = 500
 }
 variable "fallback_d2c_telemetry_egress_threshold_critical" {
  description = "D2C Telemetry Fallback limit (critical threshold)"
  default     = 1000
 }
--- a/cloud/azure/iothubs/monitors-iothubs.tf
+++ b/cloud/azure/iothubs/monitors-iothubs.tf
@ -0,0 +1,462 @@
 resource "datadog_monitor" "too_many_jobs_failed" {
  name    = "[${var.environment}] IOT Hub Too many jobs failed on {{name}}"
  message = "${var.message}"
  query = <<EOF
          sum(last_5m):(
            avg:azure.devices_iothubs.jobs.failed{${var.filter_tags}} by {resource_group,region,name}.as_count() /
            ( avg:azure.devices_iothubs.jobs.failed{${var.filter_tags}} by {resource_group,region,name}.as_count() +
                avg:azure.devices_iothubs.jobs.completed{${var.filter_tags}} by {resource_group,region,name}.as_count() )
          ) * 100 > ${var.failed_jobs_rate_threshold_critical}
  EOF
  type = "metric alert"
  thresholds {
    warning  = "${var.failed_jobs_rate_threshold_warning}"
    critical = "${var.failed_jobs_rate_threshold_critical}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "too_many_list_jobs_failed" {
  name    = "[${var.environment}] IOT Hub Too many list_jobs failure on {{name}}"
  message = "${var.message}"
  query = <<EOF
          sum(last_5m):(
            avg:azure.devices_iothubs.jobs.list_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_count() /
              ( avg:azure.devices_iothubs.jobs.list_jobs.success{${var.filter_tags}} by {resource_group,name}.as_count() +
                  avg:azure.devices_iothubs.jobs.list_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_count() )
          ) * 100 > ${var.failed_listjobs_rate_threshold_critical}
  EOF
  type = "metric alert"
  thresholds {
    warning  = "${var.failed_listjobs_rate_threshold_warning}"
    critical = "${var.failed_listjobs_rate_threshold_critical}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "too_many_query_jobs_failed" {
  name    = "[${var.environment}] IOT Hub Too many query_jobs failed on {{name}}"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m):(
      avg:azure.devices_iothubs.jobs.query_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_count() /
        ( avg:azure.devices_iothubs.jobs.query_jobs.success{${var.filter_tags}} by {resource_group,name}.as_count() +
            avg:azure.devices_iothubs.jobs.query_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_count() )
    ) * 100 > ${var.failed_queryjobs_rate_threshold_critical}
  EOF
  type = "metric alert"
  thresholds {
    warning  = "${var.failed_queryjobs_rate_threshold_warning}"
    critical = "${var.failed_queryjobs_rate_threshold_critical}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "status" {
  name    = "[${var.environment}] IOT Hub Status is not ok on {{name}}"
  message = "${var.message}"
  query = <<EOF
    avg(last_5m):avg:azure.devices_iothubs.status{${var.filter_tags}} by {resource_group,region,name} < 1
  EOF
  type = "metric alert"
  notify_no_data      = true
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "total_devices" {
  name    = "[${var.environment}] IOT Hub Total devices is wrong on {{name}}"
  message = "${var.message}"
  query = <<EOF
    avg(last_5m):avg:azure.devices_iothubs.devices.total_devices{${var.filter_tags}} by {resource_group,region,name} == 0
  EOF
  type = "metric alert"
  notify_no_data      = true
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "too_many_c2d_methods_failed" {
  name    = "[${var.environment}] IOT Hub Too many c2d methods failure on {{name}}"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m):(
      avg:azure.devices_iothubs.c2d.methods.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() /
        ( avg:azure.devices_iothubs.c2d.methods.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() +
            avg:azure.devices_iothubs.c2d.methods.success{${var.filter_tags}} by {resource_group,region,name}.as_count() )
    ) * 100 > ${var.failed_c2d_methods_rate_threshold_critical}
  EOF
  type = "metric alert"
  thresholds {
    warning  = "${var.failed_c2d_methods_rate_threshold_warning}"
    critical = "${var.failed_c2d_methods_rate_threshold_critical}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
  name    = "[${var.environment}] IOT Hub Too many c2d twin read failure on {{name}}"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m):(
      avg:azure.devices_iothubs.c2d.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() /
        ( avg:azure.devices_iothubs.c2d.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() +
            avg:azure.devices_iothubs.c2d.twin.read.success{${var.filter_tags}} by {resource_group,region,name}.as_count() )
    ) * 100 > ${var.failed_c2d_twin_read_rate_threshold_critical}
  EOF
  type = "metric alert"
  thresholds {
    warning  = "${var.failed_c2d_twin_read_rate_threshold_warning}"
    critical = "${var.failed_c2d_twin_read_rate_threshold_critical}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
  name    = "[${var.environment}] IOT Hub Too many c2d twin update failure on {{name}}"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m):(
      avg:azure.devices_iothubs.c2d.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() /
      ( avg:azure.devices_iothubs.c2d.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() +
          avg:azure.devices_iothubs.c2d.twin.update.success{${var.filter_tags}} by {resource_group,region,name}.as_count() )
    ) * 100 > ${var.failed_c2d_twin_update_rate_threshold_critical}
  EOF
  type = "metric alert"
  thresholds {
    warning  = "${var.failed_c2d_twin_update_rate_threshold_warning}"
    critical = "${var.failed_c2d_twin_update_rate_threshold_critical}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
  name    = "[${var.environment}] IOT Hub Too many d2c twin read failure on {{name}}"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m):(
      avg:azure.devices_iothubs.d2c.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() /
        ( avg:azure.devices_iothubs.d2c.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() +
          avg:azure.devices_iothubs.d2c.twin.read.success{${var.filter_tags}} by {resource_group,region,name}.as_count() )
    ) * 100 > ${var.failed_d2c_twin_read_rate_threshold_critical}
  EOF
  type = "metric alert"
  thresholds {
    warning  = "${var.failed_d2c_twin_read_rate_threshold_warning}"
    critical = "${var.failed_d2c_twin_read_rate_threshold_critical}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
  name    = "[${var.environment}] IOT Hub Too many d2c twin update failure on {{name}}"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m):(
      avg:azure.devices_iothubs.d2c.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() /
        ( avg:azure.devices_iothubs.d2c.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() +
          avg:azure.devices_iothubs.d2c.twin.update.success{${var.filter_tags}} by {resource_group,region,name}.as_count() )
    ) * 100 > ${var.failed_d2c_twin_update_rate_threshold_critical}
  EOF
  type = "metric alert"
  thresholds {
    warning  = "${var.failed_d2c_twin_update_rate_threshold_warning}"
    critical = "${var.failed_d2c_twin_update_rate_threshold_critical}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
  name    = "[${var.environment}] IOT Hub Too many d2c telemetry egress dropped on {{name}}"
  message = "${var.message}"
  query = <<EOF
      sum(last_5m): (
        avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_count()
      ) > ${var.dropped_d2c_telemetry_egress_threshold_critical}
  EOF
  type = "metric alert"
  thresholds {
    warning  = "${var.dropped_d2c_telemetry_egress_threshold_warning}"
    critical = "${var.dropped_d2c_telemetry_egress_threshold_critical}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
  name    = "[${var.environment}] IOT Hub Too many d2c telemetry egress orphaned on {{name}}"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m): (
      avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_count()
    ) > ${var.orphaned_d2c_telemetry_egress_threshold_critical}
  EOF
  type = "metric alert"
  thresholds {
    warning  = "${var.orphaned_d2c_telemetry_egress_threshold_warning}"
    critical = "${var.orphaned_d2c_telemetry_egress_threshold_critical}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
  name    = "[${var.environment}] IOT Hub Too many d2c telemetry egress invalid on {{name}}"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m): (
      avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_count()
    ) > ${var.invalid_d2c_telemetry_egress_threshold_critical}
  EOF
  type = "metric alert"
  thresholds {
    warning  = "${var.invalid_d2c_telemetry_egress_threshold_warning}"
    critical = "${var.invalid_d2c_telemetry_egress_threshold_critical}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
  name    = "[${var.environment}] IOT Hub Too many d2c telemetry egress fallback on {{name}}"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m): (
      avg:azure.devices_iothubs.d2c.telemetry.egress.fallback{${var.filter_tags}} by {resource_group,region,name}.as_count()
    )  > ${var.fallback_d2c_telemetry_egress_threshold_critical}
  EOF
  type = "metric alert"
  thresholds {
    warning  = "${var.fallback_d2c_telemetry_egress_threshold_warning}"
    critical = "${var.fallback_d2c_telemetry_egress_threshold_critical}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
  name    = "[${var.environment}] IOT Hub Too many d2c telemetry ingress no sent on {{name}}"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m): (
      avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol{${var.filter_tags}} by {resource_group,region,name}.as_count() -
        avg:azure.devices_iothubs.d2c.telemetry.ingress.success{${var.filter_tags}} by {resource_group,region,name}.as_count()
    ) > 0
  EOF
  type = "metric alert"
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
--- a/cloud/azure/monitors.tf
+++ b/cloud/azure/monitors.tf
@ -0,0 +1,166 @@
 module "apimanagement" {
  source = "./apimanagement"
  environment = "${var.environment}"
  message     = "${var.message}"
  delay       = "${var.delay}"
  filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
  filter_tags_custom       = "${var.filter_tags_custom}"
  failed_requests_threshold_critical       = "${var.apimanagement_failed_requests_threshold_critical}"
  other_requests_threshold_critical        = "${var.apimanagement_other_requests_threshold_critical}"
  successful_requests_threshold_critical   = "${var.apimanagement_successful_requests_threshold_critical}"
  unauthorized_requests_threshold_critical = "${var.apimanagement_unauthorized_requests_threshold_critical}"
 }
 module "appservices" {
  source = "./app-services"
  environment = "${var.environment}"
  message     = "${var.message}"
  delay       = "${var.delay}"
  filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
  filter_tags_custom       = "${var.filter_tags_custom}"
  http_2xx_requests_threshold_critical = "${var.appservices_http_2xx_requests_threshold_critical}"
  http_2xx_requests_threshold_warning  = "${var.appservices_http_2xx_requests_threshold_warning}"
  http_5xx_requests_threshold_critical = "${var.appservices_http_5xx_requests_threshold_critical}"
  http_5xx_requests_threshold_warning  = "${var.appservices_http_5xx_requests_threshold_warning}"
  http_4xx_requests_threshold_critical = "${var.appservices_http_4xx_requests_threshold_critical}"
  http_4xx_requests_threshold_warning  = "${var.appservices_http_4xx_requests_threshold_warning}"
  memory_usage_threshold_critical      = "${var.appservices_memory_usage_threshold_critical}"
  memory_usage_threshold_warning       = "${var.appservices_memory_usage_threshold_warning}"
  response_time_threshold_critical     = "${var.appservices_response_time_threshold_critical}"
  response_time_threshold_warning      = "${var.appservices_response_time_threshold_warning}"
 }
 module "eventhub" {
  source = "./eventhub"
  environment = "${var.environment}"
  message     = "${var.message}"
  delay       = "${var.delay}"
  filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
  filter_tags_custom       = "${var.filter_tags_custom}"
  errors_rate_thresold_critical          = "${var.eventhub_errors_rate_thresold_critical}"
  errors_rate_thresold_warning           = "${var.eventhub_errors_rate_thresold_warning}"
  failed_requests_rate_thresold_critical = "${var.eventhub_failed_requests_rate_thresold_critical}"
  failed_requests_rate_thresold_warning  = "${var.eventhub_failed_requests_rate_thresold_warning}"
 }
 module "iothub" {
  source = "./iothubs"
  environment = "${var.environment}"
  message     = "${var.message}"
  delay       = "${var.delay}"
  filter_tags = "${var.non_taggable_filter_tags}"
  dropped_d2c_telemetry_egress_threshold_critical  = "${var.iothub_dropped_d2c_telemetry_egress_threshold_critical}"
  dropped_d2c_telemetry_egress_threshold_warning   = "${var.iothub_dropped_d2c_telemetry_egress_threshold_warning}"
  failed_c2d_methods_rate_threshold_critical       = "${var.iothub_failed_c2d_methods_rate_threshold_critical}"
  failed_c2d_methods_rate_threshold_warning        = "${var.iothub_failed_c2d_methods_rate_threshold_warning}"
  failed_c2d_twin_read_rate_threshold_critical     = "${var.iothub_failed_c2d_twin_read_rate_threshold_critical}"
  failed_c2d_twin_read_rate_threshold_warning      = "${var.iothub_failed_c2d_twin_read_rate_threshold_warning}"
  failed_c2d_twin_update_rate_threshold_critical   = "${var.iothub_failed_c2d_twin_update_rate_threshold_critical}"
  failed_c2d_twin_update_rate_threshold_warning    = "${var.iothub_failed_c2d_twin_update_rate_threshold_warning}"
  failed_d2c_twin_read_rate_threshold_critical     = "${var.iothub_failed_d2c_twin_read_rate_threshold_critical}"
  failed_d2c_twin_read_rate_threshold_warning      = "${var.iothub_failed_d2c_twin_read_rate_threshold_warning}"
  failed_d2c_twin_update_rate_threshold_critical   = "${var.iothub_failed_d2c_twin_update_rate_threshold_critical}"
  failed_d2c_twin_update_rate_threshold_warning    = "${var.iothub_failed_d2c_twin_update_rate_threshold_warning}"
  failed_jobs_rate_threshold_critical              = "${var.iothub_failed_jobs_rate_threshold_critical}"
  failed_jobs_rate_threshold_warning               = "${var.iothub_failed_jobs_rate_threshold_warning}"
  failed_listjobs_rate_threshold_critical          = "${var.iothub_failed_listjobs_rate_threshold_critical}"
  failed_listjobs_rate_threshold_warning           = "${var.iothub_failed_listjobs_rate_threshold_warning}"
  failed_queryjobs_rate_threshold_critical         = "${var.iothub_failed_queryjobs_rate_threshold_critical}"
  failed_queryjobs_rate_threshold_warning          = "${var.iothub_failed_queryjobs_rate_threshold_warning}"
  fallback_d2c_telemetry_egress_threshold_critical = "${var.iothub_fallback_d2c_telemetry_egress_threshold_critical}"
  fallback_d2c_telemetry_egress_threshold_warning  = "${var.iothub_fallback_d2c_telemetry_egress_threshold_warning}"
  invalid_d2c_telemetry_egress_threshold_critical  = "${var.iothub_invalid_d2c_telemetry_egress_threshold_critical}"
  invalid_d2c_telemetry_egress_threshold_warning   = "${var.iothub_invalid_d2c_telemetry_egress_threshold_warning}"
  orphaned_d2c_telemetry_egress_threshold_critical = "${var.iothub_orphaned_d2c_telemetry_egress_threshold_critical}"
  orphaned_d2c_telemetry_egress_threshold_warning  = "${var.iothub_orphaned_d2c_telemetry_egress_threshold_warning}"
 }
 module "redis" {
  source = "./redis"
  environment = "${var.environment}"
  message     = "${var.message}"
  delay       = "${var.delay}"
  filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
  filter_tags_custom       = "${var.filter_tags_custom}"
  evictedkeys_limit_threshold_critical      = "${var.redis_evictedkeys_limit_threshold_critical}"
  evictedkeys_limit_threshold_warning       = "${var.redis_evictedkeys_limit_threshold_warning}"
  percent_processor_time_threshold_critical = "${var.redis_percent_processor_time_threshold_critical}"
  percent_processor_time_threshold_warning  = "${var.redis_percent_processor_time_threshold_warning}"
  server_load_rate_threshold_critical       = "${var.redis_server_load_rate_threshold_critical}"
  server_load_rate_threshold_warning        = "${var.redis_server_load_rate_threshold_warning}"
 }
 module "sqldatabase" {
  source = "./sql-database"
  environment = "${var.environment}"
  message     = "${var.message}"
  delay       = "${var.delay}"
  filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
  filter_tags_custom       = "${var.filter_tags_custom}"
  cpu_threshold_critical       = "${var.sqldatabase_cpu_threshold_critical}"
  cpu_threshold_warning        = "${var.sqldatabase_cpu_threshold_warning}"
  deadlock_threshold_critical  = "${var.sqldatabase_deadlock_threshold_critical}"
  diskspace_threshold_critical = "${var.sqldatabase_diskspace_threshold_critical}"
  diskspace_threshold_warning  = "${var.sqldatabase_diskspace_threshold_warning}"
  dtu_threshold_critical       = "${var.sqldatabase_dtu_threshold_critical}"
  dtu_threshold_warning        = "${var.sqldatabase_dtu_threshold_warning}"
 }
 module "storage" {
  source = "./storage"
  environment = "${var.environment}"
  message     = "${var.message}"
  delay       = "${var.delay}"
  filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
  filter_tags_custom       = "${var.filter_tags_custom}"
  authorization_error_requests_threshold_critical = "${var.storage_authorization_error_requests_threshold_critical}"
  availability_threshold_critical                 = "${var.storage_availability_threshold_critical}"
  client_other_error_requests_threshold_critical  = "${var.storage_client_other_error_requests_threshold_critical}"
  latency_threshold_critical                      = "${var.storage_latency_threshold_critical}"
  network_error_requests_threshold_critical       = "${var.storage_network_error_requests_threshold_critical}"
  server_other_error_requests_threshold_critical  = "${var.storage_server_other_error_requests_threshold_critical}"
  successful_requests_threshold_critical          = "${var.storage_successful_requests_threshold_critical}"
  throttling_error_requests_threshold_critical    = "${var.storage_throttling_error_requests_threshold_critical}"
  timeout_error_requests_threshold_critical       = "${var.storage_timeout_error_requests_threshold_critical}"
 }
 module "streamanalytics" {
  source = "./stream-analytics"
  environment = "${var.environment}"
  message     = "${var.message}"
  delay       = "${var.delay}"
  filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
  filter_tags_custom       = "${var.filter_tags_custom}"
  conversion_errors_threshold_critical        = "${var.streamanalytics_conversion_errors_threshold_critical}"
  conversion_errors_threshold_warning         = "${var.streamanalytics_conversion_errors_threshold_warning}"
  failed_function_requests_threshold_critical = "${var.streamanalytics_failed_function_requests_threshold_critical}"
  function_requests_threshold_warning         = "${var.streamanalytics_function_requests_threshold_warning}"
  runtime_errors_threshold_critical           = "${var.streamanalytics_runtime_errors_threshold_critical}"
  runtime_errors_threshold_warning            = "${var.streamanalytics_runtime_errors_threshold_warning}"
  su_utilization_threshold_critical           = "${var.streamanalytics_su_utilization_threshold_critical}"
  su_utilization_threshold_warning            = "${var.streamanalytics_su_utilization_threshold_warning}"
 }
--- a/cloud/azure/redis/README.md
+++ b/cloud/azure/redis/README.md
@ -0,0 +1,47 @@
 Azure Redis DataDog monitors
 ============================
 How to use this module
 ----------------------
 ```
 module "datadog-monitors-azure-redis" {
  source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/redis?ref={revision}"
  message     = "${module.datadog-message-alerting.alerting-message}"
  environment = "${var.environment}"
 }
 ```
 Purpose
 -------
 Creates a DataDog monitors with the following checks :
 * Service status check
 * Evicted keys count check
 * Processor time (percent) threshold
 * Server CPU load threshold
 Inputs
 ------
 | Name | Description | Type | Default | Required |
 |------|-------------|:----:|:-----:|:-----:|
 | delay | Delay in seconds for the metric evaluation | string | `600` | no |
 | environment | Architecture environment | string | - | yes |
 | evictedkeys_limit_threshold_critical | Evicted keys limit (critical threshold) | string | `100` | no |
 | evictedkeys_limit_threshold_warning | Evicted keys limit (warning threshold) | string | `0` | no |
 | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
 | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
 | message | Message sent when a Redis monitor is triggered | string | - | yes |
 | percent_processor_time_threshold_critical | Processor time percent (critical threshold) | string | `80` | no |
 | percent_processor_time_threshold_warning | Processor time percent (warning threshold) | string | `60` | no |
 | server_load_rate_threshold_critical | Server CPU load rate (critical threshold) | string | `90` | no |
 | server_load_rate_threshold_warning | Server CPU load rate (warning threshold) | string | `70` | no |
 Related documentation
 ---------------------
 DataDog documentation: [https://docs.datadoghq.com/integrations/azure_redis_cache/](https://docs.datadoghq.com/integrations/azure_redis_cache/)
 Azure Redis metrics documentation: [https://docs.microsoft.com/en-us/azure/redis-cache/cache-how-to-monitor](https://docs.microsoft.com/en-us/azure/redis-cache/cache-how-to-monitor)
--- a/cloud/azure/redis/inputs.tf
+++ b/cloud/azure/redis/inputs.tf
@ -0,0 +1,56 @@
 # Global Terraform
 variable "environment" {
  description = "Architecture environment"
  type = "string"
 }
 # Global DataDog
 variable "message" {
  description = "Message sent when a Redis monitor is triggered"
 }
 variable "delay" {
  description = "Delay in seconds for the metric evaluation"
  default     = 600
 }
 variable "filter_tags_use_defaults" {
  description = "Use default filter tags convention"
  default     = "true"
 }
 variable "filter_tags_custom" {
  description = "Tags used for custom filtering when filter_tags_use_defaults is false"
  default     = "*"
 }
 # Azure Redis specific
 variable "evictedkeys_limit_threshold_warning" {
  description = "Evicted keys limit (warning threshold)"
  default     = 0
 }
 variable "evictedkeys_limit_threshold_critical" {
  description = "Evicted keys limit (critical threshold)"
  default     = 100
 }
 variable "percent_processor_time_threshold_critical" {
  description = "Processor time percent (critical threshold)"
  default     = 80
 }
 variable "percent_processor_time_threshold_warning" {
  description = "Processor time percent (warning threshold)"
  default     = 60
 }
 variable "server_load_rate_threshold_critical" {
  description = "Server CPU load rate (critical threshold)"
  default     = 90
 }
 variable "server_load_rate_threshold_warning" {
  description = "Server CPU load rate (warning threshold)"
  default     = 70
 }
--- a/cloud/azure/redis/monitors-azure-redis.tf
+++ b/cloud/azure/redis/monitors-azure-redis.tf
@ -0,0 +1,124 @@
 data "template_file" "filter" {
  template = "$${filter}"
  vars {
    filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_redis:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
  }
 }
 resource "datadog_monitor" "status" {
  name    = "[${var.environment}] Redis {{name}} is down"
  message = "${var.message}"
  query = <<EOF
    avg(last_5m):avg:azure.cache_redis.status{${data.template_file.filter.rendered}} by {resource_group,region,name} != 1
 EOF
  type = "metric alert"
  notify_no_data      = true
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:redis", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "evictedkeys" {
  name    = "[${var.environment}] Redis {{value}} evictedkeys on {{name}}"
  message = "${var.message}"
  query = <<EOF
    avg(last_5m): (
      avg:azure.cache_redis.evictedkeys{${data.template_file.filter.rendered}} by {resource_group,region,name}
     ) > ${var.evictedkeys_limit_threshold_critical}
 EOF
  type = "metric alert"
  thresholds {
    warning  = "${var.evictedkeys_limit_threshold_warning}"
    critical = "${var.evictedkeys_limit_threshold_critical}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:redis", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "percent_processor_time" {
  name    = "[${var.environment}] Redis processor time {{value}}% on {{name}}"
  message = "${var.message}"
  query = <<EOF
    avg(last_5m): (
      avg:azure.cache_redis.percent_processor_time{${data.template_file.filter.rendered}} by {resource_group,region,name}
    ) > ${var.percent_processor_time_threshold_critical}
 EOF
  type = "metric alert"
  thresholds {
    warning  = "${var.percent_processor_time_threshold_warning}"
    critical = "${var.percent_processor_time_threshold_critical}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:redis", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "server_load" {
  name    = "[${var.environment}] Redis processor server load {{value}}% on {{name}}"
  message = "${var.message}"
  query = <<EOF
    avg(last_5m): (
      avg:azure.cache_redis.server_load{${data.template_file.filter.rendered}} by {resource_group,region,name}
    ) > ${var.server_load_rate_threshold_critical}
 EOF
  type = "metric alert"
  thresholds {
    warning  = "${var.server_load_rate_threshold_warning}"
    critical = "${var.server_load_rate_threshold_critical}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:redis", "team:azure", "provider:azure"]
 }
--- a/cloud/azure/sql-database/README.md
+++ b/cloud/azure/sql-database/README.md
@ -0,0 +1,49 @@
 Azure SQL Database DataDog monitors
 ===================================
 How to use this module
 ----------------------
 ```
 module "datadog-monitors-azure-storage" {
  source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/sql-database?ref={revision}"
  message     = "${module.datadog-message-alerting.alerting-message}"
  environment = "${var.environment}"
 }
 ```
 Purpose
 -------
 Creates a DataDog monitors with the following checks :
 * CPU High
 * Free disk space low
 * DTU Consumption high
 * SQL deadlocks
 Inputs
 ------
 | Name | Description | Type | Default | Required |
 |------|-------------|:----:|:-----:|:-----:|
 | cpu_threshold_critical | CPU usage in percent (critical threshold) | string | `90` | no |
 | cpu_threshold_warning | CPU usage in percent (warning threshold) | string | `80` | no |
 | deadlock_threshold_critical | Amount of Deadlocks (critical threshold) | string | `1` | no |
 | delay | Delay in seconds for the metric evaluation | string | `600` | no |
 | diskspace_threshold_critical | Disk space used in percent (critical threshold) | string | `90` | no |
 | diskspace_threshold_warning | Disk space used in percent (warning threshold) | string | `80` | no |
 | dtu_threshold_critical | Amount of DTU used (critical threshold) | string | `90` | no |
 | dtu_threshold_warning | Amount of DTU used (warning threshold) | string | `85` | no |
 | environment | Architecture Environment | string | - | yes |
 | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
 | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
 | message | Message sent when an alert is triggered | string | - | yes |
 Related documentation
 ---------------------
 DataDog documentation: [https://docs.datadoghq.com/integrations/azure_sql_database/](https://docs.datadoghq.com/integrations/azure_sql_database/)
 Azure SQL Database metrics documentation: [https://docs.microsoft.com/en-us/azure/sql-database/saas-dbpertenant-log-analytics](https://docs.microsoft.com/en-us/azure/sql-database/saas-dbpertenant-log-analytics)
--- a/cloud/azure/sql-database/inputs.tf
+++ b/cloud/azure/sql-database/inputs.tf
@ -0,0 +1,62 @@
 # Global Terraform
 variable "environment" {
  description = "Architecture Environment"
  type        = "string"
 }
 # Global DataDog
 variable "delay" {
  description = "Delay in seconds for the metric evaluation"
  default     = 600
 }
 variable "message" {
  description = "Message sent when an alert is triggered"
 }
 variable "filter_tags_use_defaults" {
  description = "Use default filter tags convention"
  default     = "true"
 }
 variable "filter_tags_custom" {
  description = "Tags used for custom filtering when filter_tags_use_defaults is false"
  default     = "*"
 }
 # Azure SQL Database specific
 variable "cpu_threshold_warning" {
  description = "CPU usage in percent (warning threshold)"
  default     = "80"
 }
 variable "cpu_threshold_critical" {
  description = "CPU usage in percent (critical threshold)"
  default     = "90"
 }
 variable "diskspace_threshold_warning" {
  description = "Disk space used in percent (warning threshold)"
  default     = "80"
 }
 variable "diskspace_threshold_critical" {
  description = "Disk space used in percent (critical threshold)"
  default     = "90"
 }
 variable "dtu_threshold_warning" {
  description = "Amount of DTU used (warning threshold)"
  default     = "85"
 }
 variable "dtu_threshold_critical" {
  description = "Amount of DTU used (critical threshold)"
  default     = "90"
 }
 variable "deadlock_threshold_critical" {
  description = "Amount of Deadlocks (critical threshold)"
  default     = "1"
 }
--- a/cloud/azure/sql-database/monitors-sql-database-basics.tf
+++ b/cloud/azure/sql-database/monitors-sql-database-basics.tf
@ -0,0 +1,129 @@
 data "template_file" "filter" {
  template = "$${filter}"
  vars {
    filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_sqldatabase:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
  }
 }
 resource "datadog_monitor" "sql-database_cpu_90_15min" {
  name    = "[${var.environment}] SQL Database CPU high > ${var.cpu_threshold_critical}% on {{name}}"
  message = "${var.message}"
  query = <<EOF
    avg(last_15m): (
      avg:azure.sql_servers_databases.cpu_percent{${data.template_file.filter.rendered}} by {resource_group,region,name}
    ) > ${var.cpu_threshold_critical}
  EOF
  type = "metric alert"
  thresholds {
    critical = "${var.cpu_threshold_critical}"
  }
  notify_no_data      = true
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:sqldatabase", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "sql-database_free_space_low" {
  name    = "[${var.environment}] SQL Database free space < ${var.diskspace_threshold_critical}% on {{name}}"
  message = "${var.message}"
  type = "metric alert"
  query = <<EOF
    avg(last_15m): (
      avg:azure.sql_servers_databases.storage_percent{${data.template_file.filter.rendered}} by {resource_group,region,name}
    ) > ${var.diskspace_threshold_critical}
  EOF
  thresholds {
    warning  = "${var.diskspace_threshold_warning}"
    critical = "${var.diskspace_threshold_critical}"
  }
  notify_no_data      = true
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:sqldatabase", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "sql-database_dtu_consumption_high" {
  name    = "[${var.environment}] SQL Database DTU Consumption on {{name}} > ${var.dtu_threshold_critical}"
  message = "${var.message}"
  type = "metric alert"
  query = <<EOF
    avg(last_15m): (
      azure.sql_servers_databases.dtu_consumption_percent{${data.template_file.filter.rendered}} by {resource_group,region,name}
    ) > ${var.dtu_threshold_critical}
  EOF
  thresholds {
    warning  = "${var.dtu_threshold_warning}"
    critical = "${var.dtu_threshold_critical}"
  }
  notify_no_data      = true
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:sqldatabase", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "sql-database_deadlocks_count" {
  name    = "[${var.environment}] SQL Database Deadlocks too high on {{name}}"
  message = "${var.message}"
  type = "metric alert"
  query = <<EOF
    sum(last_5m): (
      avg:azure.sql_servers_databases.deadlock{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
    ) > ${var.deadlock_threshold_critical}
  EOF
  thresholds {
    critical = "${var.deadlock_threshold_critical}"
  }
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:sqldatabase", "team:azure", "provider:azure"]
 }
--- a/cloud/azure/storage/README.md
+++ b/cloud/azure/storage/README.md
@ -32,20 +32,20 @@ Inputs
 | Name | Description | Type | Default | Required |
 |------|-------------|:----:|:-----:|:-----:|
 | authorization_error_requests_threshold_critical | Maximum acceptable percent of authorization error requests for a storage | string | `15` | no |
 | availability_threshold_critical | Minimum acceptable percent of availability for a storage | string | `90` | no |
 | client_other_error_requests_threshold_critical | Maximum acceptable percent of client other error requests for a storage | string | `15` | no |
 | delay | Delay in seconds for the metric evaluation | string | `600` | no |
 | environment | Architecture environment | string | - | yes |
 | message | Message sent when a monitor is triggered | string | - | yes |
 | filter_tags_use_defaults | Use default tagging convention | string | `true` | no |
 | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
-| availability_threshold_critical | Minimum threshold of availability | string | `90` | no |
+| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
-| successful_requests_threshold_critical | Minimum threshold of successful requests | string | `90` | no |
+| latency_threshold_critical | Maximum acceptable end to end latency (ms) for a storage | string | `1000` | no |
-| latency_threshold_critical | Maximum threshold of latency in ms | string | `1000` | no |
+| message | Message sent when a Redis monitor is triggered | string | - | yes |
-| timeout_error_requests_threshold_critical | Maximum threshold of timeout error requests in percent | string | `35` | no |
+| network_error_requests_threshold_critical | Maximum acceptable percent of network error requests for a storage | string | `5` | no |
-| network_error_requests_threshold_critical | Maximum threshold of network error requests in percent | string | `35` | no |
+| server_other_error_requests_threshold_critical | Maximum acceptable percent of server other error requests for a storage | string | `10` | no |
-| throttling_error_requests_threshold_critical | Maximum threshold of throttling error requests in percent | string | `50` | no |
+| successful_requests_threshold_critical | Minimum acceptable percent of successful requests for a storage | string | `90` | no |
-| server_other_error_requests_threshold_critical | Maximum threshold of server other error requests in percent | string | `50` | no |
+| throttling_error_requests_threshold_critical | Maximum acceptable percent of throttling error requests for a storage | string | `10` | no |
-| client_other_error_requests_threshold_critical | Maximum threshold of client other error requests in percent | string | `75` | no |
+| timeout_error_requests_threshold_critical | Maximum acceptable percent of timeout error requests for a storage | string | `5` | no |
 | authorization_error_requests_threshold_critical | Maximum threshold of authorization error requests in percent | string | `75` | no |
 Related documentation
 ---------------------
--- a/cloud/azure/stream-analytics/README.md
+++ b/cloud/azure/stream-analytics/README.md
@ -0,0 +1,39 @@
 Azure Stream Analytics DataDog monitors
 =======================================
 How to use this module
 ----------------------
 ```
 module "datadog-monitors-azure-redis" {
  source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/stream-analytics?ref={revision}"
  message = "${module.datadog-message-alerting.alerting-message}"
  environment = "${var.environment}"
  subscription_id = "${var.subscription_id}"
 }
 ```
 Inputs
 ------
 | Name | Description | Type | Default | Required |
 |------|-------------|:----:|:-----:|:-----:|
 | conversion_errors_threshold_critical | Conversion errors limit (critical threshold) | string | `10` | no |
 | conversion_errors_threshold_warning | Conversion errors limit (warning threshold) | string | `0` | no |
 | delay | Delay in seconds for the metric evaluation | string | `600` | no |
 | environment | Architecture environment | string | - | yes |
 | failed_function_requests_threshold_critical | Failed Function Request rate limit (critical threshold) | string | `10` | no |
 | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
 | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
 | function_requests_threshold_warning | Failed Function Request rate limit (warning threshold) | string | `0` | no |
 | message | Message sent when a Redis monitor is triggered | string | - | yes |
 | runtime_errors_threshold_critical | Runtime errors limit (critical threshold) | string | `10` | no |
 | runtime_errors_threshold_warning | Runtime errors limit (warning threshold) | string | `0` | no |
 | su_utilization_threshold_critical | Streaming Unit utilization rate limit (critical threshold) | string | `80` | no |
 | su_utilization_threshold_warning | Streaming Unit utilization rate limit (warning threshold) | string | `60` | no |
 Related documentation
 ---------------------
 DataDog documentation: [https://docs.datadoghq.com/integrations/azure/](https://docs.datadoghq.com/integrations/azure/)
--- a/cloud/azure/stream-analytics/inputs.tf
+++ b/cloud/azure/stream-analytics/inputs.tf
@ -0,0 +1,66 @@
 # Global Terraform
 variable "environment" {
  description = "Architecture environment"
  type        = "string"
 }
 # Global DataDog
 variable "message" {
  description = "Message sent when a Redis monitor is triggered"
 }
 variable "delay" {
  description = "Delay in seconds for the metric evaluation"
  default     = 600
 }
 variable "filter_tags_use_defaults" {
  description = "Use default filter tags convention"
  default     = "true"
 }
 variable "filter_tags_custom" {
  description = "Tags used for custom filtering when filter_tags_use_defaults is false"
  default     = "*"
 }
 # Azure Stream Analytics specific
 variable "su_utilization_threshold_warning" {
  description = "Streaming Unit utilization rate limit (warning threshold)"
  default = 60
 }
 variable "su_utilization_threshold_critical" {
  description = "Streaming Unit utilization rate limit (critical threshold)"
  default = 80
 }
 variable "function_requests_threshold_warning" {
  description = "Failed Function Request rate limit (warning threshold)"
  default = 0
 }
 variable "failed_function_requests_threshold_critical" {
  description = "Failed Function Request rate limit (critical threshold)"
  default = 10
 }
 variable "conversion_errors_threshold_warning" {
  description = "Conversion errors limit (warning threshold)"
  default = 0
 }
 variable "conversion_errors_threshold_critical" {
  description = "Conversion errors limit (critical threshold)"
  default = 10
 }
 variable "runtime_errors_threshold_warning" {
  description = "Runtime errors limit (warning threshold)"
  default = 0
 }
 variable "runtime_errors_threshold_critical" {
  description = "Runtime errors limit (critical threshold)"
  default = 10
 }
--- a/cloud/azure/stream-analytics/monitors-stream-analytics.tf
+++ b/cloud/azure/stream-analytics/monitors-stream-analytics.tf
@ -0,0 +1,147 @@
 data "template_file" "filter" {
  template = "$${filter}"
  vars {
    filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_streamanalytics:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
  }
 }
 resource "datadog_monitor" "status" {
  name    = "[${var.environment}] Stream Analytics Status is not ok on {{name}}"
  message = "${var.message}"
  query = <<EOF
    avg(last_5m):avg:azure.streamanalytics_streamingjobs.status{${data.template_file.filter.rendered}} by {resource_group,region,name} < 1
  EOF
  type  = "metric alert"
  notify_no_data      = true
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  tags = ["env:${var.environment}", "resource:streamanalytics", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "su_utilization" {
  name    = "[${var.environment}] Stream Analytics streaming Units utilization at more than ${var.su_utilization_threshold_critical}% on {{name}}"
  message = "${var.message}"
  query = <<EOF
    avg(last_5m): (
      avg:azure.streamanalytics_streamingjobs.resource_utilization{${data.template_file.filter.rendered}} by {resource_group,region,name}
    ) > ${var.su_utilization_threshold_critical}
  EOF
  type  = "metric alert"
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  thresholds {
    warning  = "${var.su_utilization_threshold_warning}"
    critical = "${var.su_utilization_threshold_critical}"
  }
  tags = ["env:${var.environment}", "resource:streamanalytics", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "failed_function_requests" {
  name    = "[${var.environment}] Stream Analytics more than ${var.failed_function_requests_threshold_critical} failed function requests on {{name}}"
  message = "${var.message}"
  query = <<EOF
    sum(last_5m): (
      avg:azure.streamanalytics_streamingjobs.aml_callout_failed_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
       avg:azure.streamanalytics_streamingjobs.aml_callout_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
    ) * 100 > ${var.failed_function_requests_threshold_critical}
  EOF
  type  = "metric alert"
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 60
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  thresholds {
    warning  = "${var.function_requests_threshold_warning}"
    critical = "${var.failed_function_requests_threshold_critical}"
  }
  tags = ["env:${var.environment}", "resource:streamanalytics", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "conversion_errors" {
  name    = "[${var.environment}] Stream Analytics more than ${var.conversion_errors_threshold_critical} conversion errors on {{name}}"
  message = "${var.message}"
  query = <<EOF
    avg(last_5m): (
      avg:azure.streamanalytics_streamingjobs.conversion_errors{${data.template_file.filter.rendered}} by {resource_group,region,name}
    ) > ${var.conversion_errors_threshold_critical}
  EOF
  type  = "metric alert"
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  thresholds {
    warning  = "${var.conversion_errors_threshold_warning}"
    critical = "${var.conversion_errors_threshold_critical}"
  }
  tags = ["env:${var.environment}", "resource:streamanalytics", "team:azure", "provider:azure"]
 }
 resource "datadog_monitor" "runtime_errors" {
  name    = "[${var.environment}] Stream Analytics more than ${var.runtime_errors_threshold_critical} runtime errors on {{name}}"
  message = "${var.message}"
  query = <<EOF
    avg(last_5m): (
      avg:azure.streamanalytics_streamingjobs.errors{${data.template_file.filter.rendered}} by {resource_group,region,name}
    ) > ${var.runtime_errors_threshold_critical}
  EOF
  type  = "metric alert"
  notify_no_data      = false
  evaluation_delay    = "${var.delay}"
  renotify_interval   = 0
  notify_audit        = false
  timeout_h           = 0
  include_tags        = true
  locked              = false
  require_full_window = true
  new_host_delay      = "${var.delay}"
  no_data_timeframe   = 20
  thresholds {
    warning  = "${var.runtime_errors_threshold_warning}"
    critical = "${var.runtime_errors_threshold_critical}"
  }
  tags = ["env:${var.environment}", "resource:streamanalytics", "team:azure", "provider:azure"]
 }