From daabb7244af225ccffc2580fbb2b441586163bba Mon Sep 17 00:00:00 2001
From: Alexandre Gaillet <alexandre.gaillet@fr.clara.net>
Date: Mon, 30 Oct 2017 14:30:05 +0100
Subject: [PATCH 01/14] MON-80 Add inputs and monitors files

---
 cloud/azure/iothubs/inputs.tf           | 36 ++++++++++
 cloud/azure/iothubs/monitors-iothubs.tf | 90 +++++++++++++++++++++++++
 2 files changed, 126 insertions(+)
 create mode 100644 cloud/azure/iothubs/inputs.tf
 create mode 100644 cloud/azure/iothubs/monitors-iothubs.tf

diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf
new file mode 100644
index 0000000..ddc3456
--- /dev/null
+++ b/cloud/azure/iothubs/inputs.tf
@@ -0,0 +1,36 @@
+variable "hno_escalation_group" {}
+
+variable "ho_escalation_group" {}
+
+variable "environment" {}
+
+variable "subscription_id" {}
+
+## IOT hubs
+variable "delay" {
+  default = 600
+}
+
+variable "warning_jobs_failed" {
+  default = 5
+}
+
+variable "critical_jobs_failed" {
+  default = 10
+}
+
+variable "warning_listjobs_failed" {
+  default = 5
+}
+
+variable "critical_listjobs_failed" {
+  default = 10
+}
+
+variable "warning_queryjobs_failed" {
+  default = 5
+}
+
+variable "critical_queryjobs_failed" {
+  default = 10
+}
\ No newline at end of file
diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf
new file mode 100644
index 0000000..5f584db
--- /dev/null
+++ b/cloud/azure/iothubs/monitors-iothubs.tf
@@ -0,0 +1,90 @@
+resource "datadog_monitor" "too_many_jobs_failed" {
+  name    = "[${var.environment}] Too many jobs failed on {{name}} "
+  message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}"
+
+  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.failed{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.jobs.failed{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.jobs.completed{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.critical_jobs_failed}"
+  type  = "query alert"
+
+  thresholds {
+    warning  = "${var.warning_jobs_failed}"
+    critical = "${var.critical_jobs_failed}"
+  }
+
+  notify_no_data      = false
+  evaluation_delay    = "${var.delay}"
+  renotify_interval   = 60
+  notify_audit        = false
+  timeout_h           = 0
+  include_tags        = true
+  locked              = false
+  require_full_window = true
+  new_host_delay      = "${var.delay}"
+  no_data_timeframe   = 20
+}
+
+resource "datadog_monitor" "too_many_list_jobs_failed" {
+  name    = "[${var.environment}] Too many list_jobs failure on {{name}} "
+  message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}"
+
+  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.list_jobs.failure{*} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.list_jobs.success{*} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.list_jobs.failure{*} by {resource_group,name}.as_count() ) ) * 100 > ${var.critical_listjobs_failed}"
+  type  = "query alert"
+
+  thresholds {
+    warning  = "${var.warning_listjobs_failed}"
+    critical = "${var.critical_listjobs_failed}"
+  }
+
+  notify_no_data      = false
+  evaluation_delay    = "${var.delay}"
+  renotify_interval   = 60
+  notify_audit        = false
+  timeout_h           = 0
+  include_tags        = true
+  locked              = false
+  require_full_window = true
+  new_host_delay      = "${var.delay}"
+  no_data_timeframe   = 20
+}
+
+resource "datadog_monitor" "too_many_query_jobs_failed" {
+  name    = "[${var.environment}] Too many query_jobs failed on {{name}} "
+  message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}"
+
+  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.query_jobs.failure{*} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.query_jobs.success{*} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.query_jobs.failure{*} by {resource_group,name}.as_count() ) ) * 100 > ${var.critical_queryjobs_failed}"
+  type  = "query alert"
+
+  thresholds {
+    warning  = "${var.warning_queryjobs_failed}"
+    critical = "${var.critical_queryjobs_failed}"
+  }
+
+  notify_no_data      = false
+  evaluation_delay    = "${var.delay}"
+  renotify_interval   = 60
+  notify_audit        = false
+  timeout_h           = 0
+  include_tags        = true
+  locked              = false
+  require_full_window = true
+  new_host_delay      = "${var.delay}"
+  no_data_timeframe   = 20
+}
+
+resource "datadog_monitor" "status" {
+  name    = "[${var.environment}] Status is not ok on {{name}} "
+  message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}"
+
+  query = "avg(last_5m):avg:azure.devices_iothubs.status{*} by {name,resource_group} < 1"
+  type  = "query alert"
+
+  notify_no_data      = false
+  evaluation_delay    = "${var.delay}"
+  renotify_interval   = 60
+  notify_audit        = false
+  timeout_h           = 0
+  include_tags        = true
+  locked              = false
+  require_full_window = true
+  new_host_delay      = "${var.delay}"
+  no_data_timeframe   = 20
+}
\ No newline at end of file

From 7f0a0e91cf6fdd3cb6ea5d33abcbf2dbdb41c0a2 Mon Sep 17 00:00:00 2001
From: Alexandre Gaillet <alexandre.gaillet@fr.clara.net>
Date: Mon, 30 Oct 2017 15:21:40 +0100
Subject: [PATCH 02/14] MON-80 Rename variable for message alerting

---
 cloud/azure/iothubs/inputs.tf           | 8 +++++---
 cloud/azure/iothubs/monitors-iothubs.tf | 8 ++++----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf
index ddc3456..5de7dab 100644
--- a/cloud/azure/iothubs/inputs.tf
+++ b/cloud/azure/iothubs/inputs.tf
@@ -1,10 +1,12 @@
-variable "hno_escalation_group" {}
+variable "critical_escalation_group" {}
 
-variable "ho_escalation_group" {}
+variable "warning_escalation_group" {}
 
 variable "environment" {}
 
-variable "subscription_id" {}
+variable "stack" {}
+
+variable "client_name" {}
 
 ## IOT hubs
 variable "delay" {
diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf
index 5f584db..e333808 100644
--- a/cloud/azure/iothubs/monitors-iothubs.tf
+++ b/cloud/azure/iothubs/monitors-iothubs.tf
@@ -1,6 +1,6 @@
 resource "datadog_monitor" "too_many_jobs_failed" {
   name    = "[${var.environment}] Too many jobs failed on {{name}} "
-  message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}"
+  message = "{{#is_alert}}\n${var.critical_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.critical_escalation_group}\n{{/is_recovery}}"
 
   query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.failed{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.jobs.failed{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.jobs.completed{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.critical_jobs_failed}"
   type  = "query alert"
@@ -24,7 +24,7 @@ resource "datadog_monitor" "too_many_jobs_failed" {
 
 resource "datadog_monitor" "too_many_list_jobs_failed" {
   name    = "[${var.environment}] Too many list_jobs failure on {{name}} "
-  message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}"
+  message = "{{#is_alert}}\n${var.critical_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.critical_escalation_group}\n{{/is_recovery}}"
 
   query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.list_jobs.failure{*} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.list_jobs.success{*} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.list_jobs.failure{*} by {resource_group,name}.as_count() ) ) * 100 > ${var.critical_listjobs_failed}"
   type  = "query alert"
@@ -48,7 +48,7 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
 
 resource "datadog_monitor" "too_many_query_jobs_failed" {
   name    = "[${var.environment}] Too many query_jobs failed on {{name}} "
-  message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}"
+  message = "{{#is_alert}}\n${var.critical_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.critical_escalation_group}\n{{/is_recovery}}"
 
   query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.query_jobs.failure{*} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.query_jobs.success{*} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.query_jobs.failure{*} by {resource_group,name}.as_count() ) ) * 100 > ${var.critical_queryjobs_failed}"
   type  = "query alert"
@@ -72,7 +72,7 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
 
 resource "datadog_monitor" "status" {
   name    = "[${var.environment}] Status is not ok on {{name}} "
-  message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}"
+  message = "{{#is_alert}}\n${var.critical_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.critical_escalation_group}\n{{/is_recovery}}"
 
   query = "avg(last_5m):avg:azure.devices_iothubs.status{*} by {name,resource_group} < 1"
   type  = "query alert"

From 4c474be541eb5f3a3f14bf2a8bd7716803651ecf Mon Sep 17 00:00:00 2001
From: Alexandre Gaillet <alexandre.gaillet@fr.clara.net>
Date: Mon, 30 Oct 2017 16:42:58 +0100
Subject: [PATCH 03/14] MON-80 Add monitors and update variables

---
 cloud/azure/iothubs/inputs.tf           | 44 +++++++++++-----
 cloud/azure/iothubs/monitors-iothubs.tf | 69 ++++++++++++++++++++-----
 2 files changed, 86 insertions(+), 27 deletions(-)

diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf
index 5de7dab..38b1b44 100644
--- a/cloud/azure/iothubs/inputs.tf
+++ b/cloud/azure/iothubs/inputs.tf
@@ -1,38 +1,54 @@
-variable "critical_escalation_group" {}
-
-variable "warning_escalation_group" {}
-
 variable "environment" {}
 
 variable "stack" {}
 
 variable "client_name" {}
 
-## IOT hubs
 variable "delay" {
   default = 600
 }
 
-variable "warning_jobs_failed" {
-  default = 5
+## IOT hubs
+variable "jobs_failed_threshold_warning" {
+  default = 0
 }
 
-variable "critical_jobs_failed" {
+variable "jobs_failed_threshold_critical" {
   default = 10
 }
 
-variable "warning_listjobs_failed" {
-  default = 5
+variable "jobs_failed_message" {}
+
+variable "listjobs_failed_threshold_warning" {
+  default = 0
 }
 
-variable "critical_listjobs_failed" {
+variable "listjobs_failed_threshold_critical" {
   default = 10
 }
 
-variable "warning_queryjobs_failed" {
-  default = 5
+variable "listjobs_failed_message" {}
+
+variable "queryjobs_failed_threshold_warning" {
+  default = 0
 }
 
-variable "critical_queryjobs_failed" {
+variable "queryjobs_failed_threshold_critical" {
+  default = 10
+}
+
+variable "queryjobs_failed_message" {}
+
+variable "status_message" {}
+
+variable "total_devices_message" {}
+
+variable "c2d_methods_failed_message" {}
+
+variable "c2d_methods_failed_threshold_warning" {
+  default = 0
+}
+
+variable "c2d_methods_failed_threshold_critical" {
   default = 10
 }
\ No newline at end of file
diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf
index e333808..12f3d9a 100644
--- a/cloud/azure/iothubs/monitors-iothubs.tf
+++ b/cloud/azure/iothubs/monitors-iothubs.tf
@@ -1,13 +1,13 @@
 resource "datadog_monitor" "too_many_jobs_failed" {
   name    = "[${var.environment}] Too many jobs failed on {{name}} "
-  message = "{{#is_alert}}\n${var.critical_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.critical_escalation_group}\n{{/is_recovery}}"
+  message = "${var.jobs_failed_message}"
 
-  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.failed{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.jobs.failed{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.jobs.completed{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.critical_jobs_failed}"
+  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.failed{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.jobs.failed{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.jobs.completed{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.jobs_failed_threshold_critical}"
   type  = "query alert"
 
   thresholds {
-    warning  = "${var.warning_jobs_failed}"
-    critical = "${var.critical_jobs_failed}"
+    warning  = "${var.jobs_failed_threshold_warning}"
+    critical = "${var.jobs_failed_threshold_critical}"
   }
 
   notify_no_data      = false
@@ -24,14 +24,14 @@ resource "datadog_monitor" "too_many_jobs_failed" {
 
 resource "datadog_monitor" "too_many_list_jobs_failed" {
   name    = "[${var.environment}] Too many list_jobs failure on {{name}} "
-  message = "{{#is_alert}}\n${var.critical_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.critical_escalation_group}\n{{/is_recovery}}"
+  message = "${var.listjobs_failed_message}"
 
-  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.list_jobs.failure{*} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.list_jobs.success{*} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.list_jobs.failure{*} by {resource_group,name}.as_count() ) ) * 100 > ${var.critical_listjobs_failed}"
+  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.list_jobs.failure{*} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.list_jobs.success{*} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.list_jobs.failure{*} by {resource_group,name}.as_count() ) ) * 100 > ${var.listjobs_failed_threshold_critical}"
   type  = "query alert"
 
   thresholds {
-    warning  = "${var.warning_listjobs_failed}"
-    critical = "${var.critical_listjobs_failed}"
+    warning  = "${var.listjobs_failed_threshold_warning}"
+    critical = "${var.listjobs_failed_threshold_critical}"
   }
 
   notify_no_data      = false
@@ -48,14 +48,14 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
 
 resource "datadog_monitor" "too_many_query_jobs_failed" {
   name    = "[${var.environment}] Too many query_jobs failed on {{name}} "
-  message = "{{#is_alert}}\n${var.critical_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.critical_escalation_group}\n{{/is_recovery}}"
+  message = "${var.queryjobs_failed_message}"
 
-  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.query_jobs.failure{*} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.query_jobs.success{*} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.query_jobs.failure{*} by {resource_group,name}.as_count() ) ) * 100 > ${var.critical_queryjobs_failed}"
+  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.query_jobs.failure{*} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.query_jobs.success{*} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.query_jobs.failure{*} by {resource_group,name}.as_count() ) ) * 100 > ${var.queryjobs_failed_threshold_critical}"
   type  = "query alert"
 
   thresholds {
-    warning  = "${var.warning_queryjobs_failed}"
-    critical = "${var.critical_queryjobs_failed}"
+    warning  = "${var.queryjobs_failed_threshold_warning}"
+    critical = "${var.queryjobs_failed_threshold_warning}"
   }
 
   notify_no_data      = false
@@ -72,11 +72,54 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
 
 resource "datadog_monitor" "status" {
   name    = "[${var.environment}] Status is not ok on {{name}} "
-  message = "{{#is_alert}}\n${var.critical_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.critical_escalation_group}\n{{/is_recovery}}"
+  message = "${var.status_message}"
 
   query = "avg(last_5m):avg:azure.devices_iothubs.status{*} by {name,resource_group} < 1"
   type  = "query alert"
 
+  notify_no_data      = true
+  evaluation_delay    = "${var.delay}"
+  renotify_interval   = 60
+  notify_audit        = false
+  timeout_h           = 0
+  include_tags        = true
+  locked              = false
+  require_full_window = true
+  new_host_delay      = "${var.delay}"
+  no_data_timeframe   = 20
+}
+
+resource "datadog_monitor" "total_devices" {
+  name    = "[${var.environment}] Total devices is wrong on {{name}} "
+  message = "${var.total_devices_message}"
+
+  query = "avg(last_5m):avg:azure.devices_iothubs.devices.total_devices{*} by {name,resource_group} == 0"
+  type  = "query alert"
+
+  notify_no_data      = true
+  evaluation_delay    = "${var.delay}"
+  renotify_interval   = 60
+  notify_audit        = false
+  timeout_h           = 0
+  include_tags        = true
+  locked              = false
+  require_full_window = true
+  new_host_delay      = "${var.delay}"
+  no_data_timeframe   = 20
+}
+
+resource "datadog_monitor" "too_many_c2d_methods_failed" {
+  name    = "[${var.environment}] Too many c2d methods failure on {{name}} "
+  message = "${var.c2d_methods_failed_message}"
+
+  query = "avg(last_5m):( avg:azure.devices_iothubs.c2d.methods.failure{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.c2d.methods.failure{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.c2d.methods.success{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.c2d_methods_failed_threshold_critical}"
+  type  = "query alert"
+
+  thresholds {
+    warning  = "${var.c2d_methods_failed_threshold_warning}"
+    critical = "${var.c2d_methods_failed_threshold_critical}"
+  }
+
   notify_no_data      = false
   evaluation_delay    = "${var.delay}"
   renotify_interval   = 60

From effaaf0e12d6446510700e4bc71765c8a0b37441 Mon Sep 17 00:00:00 2001
From: Alexandre Gaillet <alexandre.gaillet@fr.clara.net>
Date: Mon, 30 Oct 2017 17:13:42 +0100
Subject: [PATCH 04/14] MON-80 Add c2d and d2c monitors

---
 cloud/azure/iothubs/inputs.tf           | 46 +++++++++++-
 cloud/azure/iothubs/monitors-iothubs.tf | 98 ++++++++++++++++++++++++-
 2 files changed, 140 insertions(+), 4 deletions(-)

diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf
index 38b1b44..093b3a3 100644
--- a/cloud/azure/iothubs/inputs.tf
+++ b/cloud/azure/iothubs/inputs.tf
@@ -43,12 +43,52 @@ variable "status_message" {}
 
 variable "total_devices_message" {}
 
-variable "c2d_methods_failed_message" {}
-
 variable "c2d_methods_failed_threshold_warning" {
   default = 0
 }
 
 variable "c2d_methods_failed_threshold_critical" {
   default = 10
-}
\ No newline at end of file
+}
+
+variable "c2d_methods_failed_message" {}
+
+variable "c2d_twin_read_failed_threshold_warning" {
+  default = 0
+}
+
+variable "c2d_twin_read_failed_threshold_critical" {
+  default = 10
+}
+
+variable "c2d_twin_read_failed_message" {}
+
+variable "c2d_twin_update_failed_threshold_warning" {
+  default = 0
+}
+
+variable "c2d_twin_update_failed_threshold_critical" {
+  default = 10
+}
+
+variable "c2d_twin_update_failed_message" {}
+
+variable "d2c_twin_read_failed_threshold_warning" {
+  default = 0
+}
+
+variable "d2c_twin_read_failed_threshold_critical" {
+  default = 10
+}
+
+variable "d2c_twin_read_failed_message" {}
+
+variable "d2c_twin_update_failed_threshold_warning" {
+  default = 0
+}
+
+variable "d2c_twin_update_failed_threshold_critical" {
+  default = 10
+}
+
+variable "d2c_twin_update_failed_message" {}
\ No newline at end of file
diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf
index 12f3d9a..8d44dde 100644
--- a/cloud/azure/iothubs/monitors-iothubs.tf
+++ b/cloud/azure/iothubs/monitors-iothubs.tf
@@ -130,4 +130,100 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
   require_full_window = true
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
-}
\ No newline at end of file
+}
+
+resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
+  name    = "[${var.environment}] Too many c2d twin read failure on {{name}} "
+  message = "${var.c2d_twin_read_failed_message}"
+
+  query = "avg(last_5m):( avg:azure.devices_iothubs.c2d.twin.read.failure{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.c2d.twin.read.failure{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.c2d.twin.read.success{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.c2d_twin_read_failed_threshold_critical}"
+  type  = "query alert"
+
+  thresholds {
+    warning  = "${var.c2d_twin_read_failed_threshold_warning}"
+    critical = "${var.c2d_twin_read_failed_threshold_critical}"
+  }
+
+  notify_no_data      = false
+  evaluation_delay    = "${var.delay}"
+  renotify_interval   = 60
+  notify_audit        = false
+  timeout_h           = 0
+  include_tags        = true
+  locked              = false
+  require_full_window = true
+  new_host_delay      = "${var.delay}"
+  no_data_timeframe   = 20
+}
+
+resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
+  name    = "[${var.environment}] Too many c2d twin update failure on {{name}} "
+  message = "${var.c2d_twin_update_failed_message}"
+
+  query = "avg(last_5m):( avg:azure.devices_iothubs.c2d.twin.update.failure{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.c2d.twin.update.failure{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.c2d.twin.update.success{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.c2d_twin_update_failed_threshold_critical}"
+  type  = "query alert"
+
+  thresholds {
+    warning  = "${var.c2d_twin_update_failed_threshold_warning}"
+    critical = "${var.c2d_twin_update_failed_threshold_critical}"
+  }
+
+  notify_no_data      = false
+  evaluation_delay    = "${var.delay}"
+  renotify_interval   = 60
+  notify_audit        = false
+  timeout_h           = 0
+  include_tags        = true
+  locked              = false
+  require_full_window = true
+  new_host_delay      = "${var.delay}"
+  no_data_timeframe   = 20
+}
+
+resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
+  name    = "[${var.environment}] Too many d2c twin read failure on {{name}} "
+  message = "${var.d2c_twin_read_failed_message}"
+
+  query = "avg(last_5m):( avg:azure.devices_iothubs.d2c.twin.read.failure{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.d2c.twin.read.failure{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.d2c.twin.read.success{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.d2c_twin_read_failed_threshold_critical}"
+  type  = "query alert"
+
+  thresholds {
+    warning  = "${var.d2c_twin_read_failed_threshold_warning}"
+    critical = "${var.d2c_twin_read_failed_threshold_critical}"
+  }
+
+  notify_no_data      = false
+  evaluation_delay    = "${var.delay}"
+  renotify_interval   = 60
+  notify_audit        = false
+  timeout_h           = 0
+  include_tags        = true
+  locked              = false
+  require_full_window = true
+  new_host_delay      = "${var.delay}"
+  no_data_timeframe   = 20
+}
+
+resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
+  name    = "[${var.environment}] Too many d2c twin update failure on {{name}} "
+  message = "${var.d2c_twin_update_failed_message}"
+
+  query = "avg(last_5m):( avg:azure.devices_iothubs.d2c.twin.update.failure{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.d2c.twin.update.failure{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.d2c.twin.update.success{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.d2c_twin_update_failed_threshold_critical}"
+  type  = "query alert"
+
+  thresholds {
+    warning  = "${var.d2c_twin_update_failed_threshold_warning}"
+    critical = "${var.d2c_twin_update_failed_threshold_critical}"
+  }
+
+  notify_no_data      = false
+  evaluation_delay    = "${var.delay}"
+  renotify_interval   = 60
+  notify_audit        = false
+  timeout_h           = 0
+  include_tags        = true
+  locked              = false
+  require_full_window = true
+  new_host_delay      = "${var.delay}"
+  no_data_timeframe   = 20
+}

From 5136dd5c4d1e5b3bea7685b95f8361d21a02dc34 Mon Sep 17 00:00:00 2001
From: Alexandre Gaillet <alexandre.gaillet@fr.clara.net>
Date: Mon, 30 Oct 2017 17:30:16 +0100
Subject: [PATCH 05/14] MON-80 Add subscription_id

---
 cloud/azure/iothubs/inputs.tf           |  2 ++
 cloud/azure/iothubs/monitors-iothubs.tf | 40 ++++++++++++-------------
 2 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf
index 093b3a3..e705d8f 100644
--- a/cloud/azure/iothubs/inputs.tf
+++ b/cloud/azure/iothubs/inputs.tf
@@ -4,6 +4,8 @@ variable "stack" {}
 
 variable "client_name" {}
 
+variable "subscription_id" {}
+
 variable "delay" {
   default = 600
 }
diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf
index 8d44dde..a4ec018 100644
--- a/cloud/azure/iothubs/monitors-iothubs.tf
+++ b/cloud/azure/iothubs/monitors-iothubs.tf
@@ -2,7 +2,7 @@ resource "datadog_monitor" "too_many_jobs_failed" {
   name    = "[${var.environment}] Too many jobs failed on {{name}} "
   message = "${var.jobs_failed_message}"
 
-  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.failed{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.jobs.failed{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.jobs.completed{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.jobs_failed_threshold_critical}"
+  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.jobs.completed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.jobs_failed_threshold_critical}"
   type  = "query alert"
 
   thresholds {
@@ -12,7 +12,7 @@ resource "datadog_monitor" "too_many_jobs_failed" {
 
   notify_no_data      = false
   evaluation_delay    = "${var.delay}"
-  renotify_interval   = 60
+  renotify_interval   = 0
   notify_audit        = false
   timeout_h           = 0
   include_tags        = true
@@ -26,7 +26,7 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
   name    = "[${var.environment}] Too many list_jobs failure on {{name}} "
   message = "${var.listjobs_failed_message}"
 
-  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.list_jobs.failure{*} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.list_jobs.success{*} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.list_jobs.failure{*} by {resource_group,name}.as_count() ) ) * 100 > ${var.listjobs_failed_threshold_critical}"
+  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.list_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() ) ) * 100 > ${var.listjobs_failed_threshold_critical}"
   type  = "query alert"
 
   thresholds {
@@ -36,7 +36,7 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
 
   notify_no_data      = false
   evaluation_delay    = "${var.delay}"
-  renotify_interval   = 60
+  renotify_interval   = 0
   notify_audit        = false
   timeout_h           = 0
   include_tags        = true
@@ -50,7 +50,7 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
   name    = "[${var.environment}] Too many query_jobs failed on {{name}} "
   message = "${var.queryjobs_failed_message}"
 
-  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.query_jobs.failure{*} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.query_jobs.success{*} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.query_jobs.failure{*} by {resource_group,name}.as_count() ) ) * 100 > ${var.queryjobs_failed_threshold_critical}"
+  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.query_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() ) ) * 100 > ${var.queryjobs_failed_threshold_critical}"
   type  = "query alert"
 
   thresholds {
@@ -60,7 +60,7 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
 
   notify_no_data      = false
   evaluation_delay    = "${var.delay}"
-  renotify_interval   = 60
+  renotify_interval   = 0
   notify_audit        = false
   timeout_h           = 0
   include_tags        = true
@@ -74,12 +74,12 @@ resource "datadog_monitor" "status" {
   name    = "[${var.environment}] Status is not ok on {{name}} "
   message = "${var.status_message}"
 
-  query = "avg(last_5m):avg:azure.devices_iothubs.status{*} by {name,resource_group} < 1"
+  query = "avg(last_5m):avg:azure.devices_iothubs.status{subscription_id:${var.subscription_id}} by {name,resource_group} < 1"
   type  = "query alert"
 
   notify_no_data      = true
   evaluation_delay    = "${var.delay}"
-  renotify_interval   = 60
+  renotify_interval   = 0
   notify_audit        = false
   timeout_h           = 0
   include_tags        = true
@@ -93,12 +93,12 @@ resource "datadog_monitor" "total_devices" {
   name    = "[${var.environment}] Total devices is wrong on {{name}} "
   message = "${var.total_devices_message}"
 
-  query = "avg(last_5m):avg:azure.devices_iothubs.devices.total_devices{*} by {name,resource_group} == 0"
+  query = "avg(last_5m):avg:azure.devices_iothubs.devices.total_devices{subscription_id:${var.subscription_id}} by {name,resource_group} == 0"
   type  = "query alert"
 
   notify_no_data      = true
   evaluation_delay    = "${var.delay}"
-  renotify_interval   = 60
+  renotify_interval   = 0
   notify_audit        = false
   timeout_h           = 0
   include_tags        = true
@@ -112,7 +112,7 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
   name    = "[${var.environment}] Too many c2d methods failure on {{name}} "
   message = "${var.c2d_methods_failed_message}"
 
-  query = "avg(last_5m):( avg:azure.devices_iothubs.c2d.methods.failure{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.c2d.methods.failure{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.c2d.methods.success{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.c2d_methods_failed_threshold_critical}"
+  query = "avg(last_5m):( avg:azure.devices_iothubs.c2d.methods.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.c2d.methods.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.c2d.methods.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.c2d_methods_failed_threshold_critical}"
   type  = "query alert"
 
   thresholds {
@@ -122,7 +122,7 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
 
   notify_no_data      = false
   evaluation_delay    = "${var.delay}"
-  renotify_interval   = 60
+  renotify_interval   = 0
   notify_audit        = false
   timeout_h           = 0
   include_tags        = true
@@ -136,7 +136,7 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
   name    = "[${var.environment}] Too many c2d twin read failure on {{name}} "
   message = "${var.c2d_twin_read_failed_message}"
 
-  query = "avg(last_5m):( avg:azure.devices_iothubs.c2d.twin.read.failure{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.c2d.twin.read.failure{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.c2d.twin.read.success{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.c2d_twin_read_failed_threshold_critical}"
+  query = "avg(last_5m):( avg:azure.devices_iothubs.c2d.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.c2d.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.c2d.twin.read.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.c2d_twin_read_failed_threshold_critical}"
   type  = "query alert"
 
   thresholds {
@@ -146,7 +146,7 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
 
   notify_no_data      = false
   evaluation_delay    = "${var.delay}"
-  renotify_interval   = 60
+  renotify_interval   = 0
   notify_audit        = false
   timeout_h           = 0
   include_tags        = true
@@ -160,7 +160,7 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
   name    = "[${var.environment}] Too many c2d twin update failure on {{name}} "
   message = "${var.c2d_twin_update_failed_message}"
 
-  query = "avg(last_5m):( avg:azure.devices_iothubs.c2d.twin.update.failure{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.c2d.twin.update.failure{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.c2d.twin.update.success{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.c2d_twin_update_failed_threshold_critical}"
+  query = "avg(last_5m):( avg:azure.devices_iothubs.c2d.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.c2d.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.c2d.twin.update.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.c2d_twin_update_failed_threshold_critical}"
   type  = "query alert"
 
   thresholds {
@@ -170,7 +170,7 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
 
   notify_no_data      = false
   evaluation_delay    = "${var.delay}"
-  renotify_interval   = 60
+  renotify_interval   = 0
   notify_audit        = false
   timeout_h           = 0
   include_tags        = true
@@ -184,7 +184,7 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
   name    = "[${var.environment}] Too many d2c twin read failure on {{name}} "
   message = "${var.d2c_twin_read_failed_message}"
 
-  query = "avg(last_5m):( avg:azure.devices_iothubs.d2c.twin.read.failure{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.d2c.twin.read.failure{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.d2c.twin.read.success{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.d2c_twin_read_failed_threshold_critical}"
+  query = "avg(last_5m):( avg:azure.devices_iothubs.d2c.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.d2c.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.d2c.twin.read.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.d2c_twin_read_failed_threshold_critical}"
   type  = "query alert"
 
   thresholds {
@@ -194,7 +194,7 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
 
   notify_no_data      = false
   evaluation_delay    = "${var.delay}"
-  renotify_interval   = 60
+  renotify_interval   = 0
   notify_audit        = false
   timeout_h           = 0
   include_tags        = true
@@ -208,7 +208,7 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
   name    = "[${var.environment}] Too many d2c twin update failure on {{name}} "
   message = "${var.d2c_twin_update_failed_message}"
 
-  query = "avg(last_5m):( avg:azure.devices_iothubs.d2c.twin.update.failure{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.d2c.twin.update.failure{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.d2c.twin.update.success{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.d2c_twin_update_failed_threshold_critical}"
+  query = "avg(last_5m):( avg:azure.devices_iothubs.d2c.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.d2c.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.d2c.twin.update.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.d2c_twin_update_failed_threshold_critical}"
   type  = "query alert"
 
   thresholds {
@@ -218,7 +218,7 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
 
   notify_no_data      = false
   evaluation_delay    = "${var.delay}"
-  renotify_interval   = 60
+  renotify_interval   = 0
   notify_audit        = false
   timeout_h           = 0
   include_tags        = true

From 193352c212277fac91d51ad336b704f7cde8d54c Mon Sep 17 00:00:00 2001
From: Alexandre Gaillet <alexandre.gaillet@fr.clara.net>
Date: Mon, 30 Oct 2017 18:09:03 +0100
Subject: [PATCH 06/14] MON-80 Add IOT Hub in Names

---
 cloud/azure/iothubs/monitors-iothubs.tf | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf
index a4ec018..f111897 100644
--- a/cloud/azure/iothubs/monitors-iothubs.tf
+++ b/cloud/azure/iothubs/monitors-iothubs.tf
@@ -1,5 +1,5 @@
 resource "datadog_monitor" "too_many_jobs_failed" {
-  name    = "[${var.environment}] Too many jobs failed on {{name}} "
+  name    = "[${var.environment}] IOT Hub Too many jobs failed on {{name}}"
   message = "${var.jobs_failed_message}"
 
   query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.jobs.completed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.jobs_failed_threshold_critical}"
@@ -23,7 +23,7 @@ resource "datadog_monitor" "too_many_jobs_failed" {
 }
 
 resource "datadog_monitor" "too_many_list_jobs_failed" {
-  name    = "[${var.environment}] Too many list_jobs failure on {{name}} "
+  name    = "[${var.environment}] IOT Hub Too many list_jobs failure on {{name}}"
   message = "${var.listjobs_failed_message}"
 
   query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.list_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() ) ) * 100 > ${var.listjobs_failed_threshold_critical}"
@@ -47,7 +47,7 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
 }
 
 resource "datadog_monitor" "too_many_query_jobs_failed" {
-  name    = "[${var.environment}] Too many query_jobs failed on {{name}} "
+  name    = "[${var.environment}] IOT Hub Too many query_jobs failed on {{name}}"
   message = "${var.queryjobs_failed_message}"
 
   query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.query_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() ) ) * 100 > ${var.queryjobs_failed_threshold_critical}"
@@ -55,7 +55,7 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
 
   thresholds {
     warning  = "${var.queryjobs_failed_threshold_warning}"
-    critical = "${var.queryjobs_failed_threshold_warning}"
+    critical = "${var.queryjobs_failed_threshold_critical}"
   }
 
   notify_no_data      = false
@@ -71,7 +71,7 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
 }
 
 resource "datadog_monitor" "status" {
-  name    = "[${var.environment}] Status is not ok on {{name}} "
+  name    = "[${var.environment}] IOT Hub Status is not ok on {{name}}"
   message = "${var.status_message}"
 
   query = "avg(last_5m):avg:azure.devices_iothubs.status{subscription_id:${var.subscription_id}} by {name,resource_group} < 1"
@@ -90,7 +90,7 @@ resource "datadog_monitor" "status" {
 }
 
 resource "datadog_monitor" "total_devices" {
-  name    = "[${var.environment}] Total devices is wrong on {{name}} "
+  name    = "[${var.environment}] IOT Hub Total devices is wrong on {{name}}"
   message = "${var.total_devices_message}"
 
   query = "avg(last_5m):avg:azure.devices_iothubs.devices.total_devices{subscription_id:${var.subscription_id}} by {name,resource_group} == 0"
@@ -109,7 +109,7 @@ resource "datadog_monitor" "total_devices" {
 }
 
 resource "datadog_monitor" "too_many_c2d_methods_failed" {
-  name    = "[${var.environment}] Too many c2d methods failure on {{name}} "
+  name    = "[${var.environment}] IOT Hub Too many c2d methods failure on {{name}}"
   message = "${var.c2d_methods_failed_message}"
 
   query = "avg(last_5m):( avg:azure.devices_iothubs.c2d.methods.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.c2d.methods.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.c2d.methods.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.c2d_methods_failed_threshold_critical}"
@@ -133,7 +133,7 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
 }
 
 resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
-  name    = "[${var.environment}] Too many c2d twin read failure on {{name}} "
+  name    = "[${var.environment}] IOT Hub Too many c2d twin read failure on {{name}}"
   message = "${var.c2d_twin_read_failed_message}"
 
   query = "avg(last_5m):( avg:azure.devices_iothubs.c2d.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.c2d.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.c2d.twin.read.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.c2d_twin_read_failed_threshold_critical}"
@@ -157,7 +157,7 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
 }
 
 resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
-  name    = "[${var.environment}] Too many c2d twin update failure on {{name}} "
+  name    = "[${var.environment}] IOT Hub Too many c2d twin update failure on {{name}}"
   message = "${var.c2d_twin_update_failed_message}"
 
   query = "avg(last_5m):( avg:azure.devices_iothubs.c2d.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.c2d.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.c2d.twin.update.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.c2d_twin_update_failed_threshold_critical}"
@@ -181,7 +181,7 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
 }
 
 resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
-  name    = "[${var.environment}] Too many d2c twin read failure on {{name}} "
+  name    = "[${var.environment}] IOT Hub Too many d2c twin read failure on {{name}}"
   message = "${var.d2c_twin_read_failed_message}"
 
   query = "avg(last_5m):( avg:azure.devices_iothubs.d2c.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.d2c.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.d2c.twin.read.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.d2c_twin_read_failed_threshold_critical}"
@@ -205,7 +205,7 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
 }
 
 resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
-  name    = "[${var.environment}] Too many d2c twin update failure on {{name}} "
+  name    = "[${var.environment}] IOT Hub Too many d2c twin update failure on {{name}}"
   message = "${var.d2c_twin_update_failed_message}"
 
   query = "avg(last_5m):( avg:azure.devices_iothubs.d2c.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.d2c.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.d2c.twin.update.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.d2c_twin_update_failed_threshold_critical}"

From 113d4aabd25fa1172dea821ef6b6f7688011f960 Mon Sep 17 00:00:00 2001
From: Alexandre Gaillet <alexandre.gaillet@fr.clara.net>
Date: Tue, 31 Oct 2017 11:12:26 +0100
Subject: [PATCH 07/14] MON-80 Add monitors for telemetry

---
 cloud/azure/iothubs/inputs.tf           |  44 ++++++++-
 cloud/azure/iothubs/monitors-iothubs.tf | 121 +++++++++++++++++++++++-
 2 files changed, 161 insertions(+), 4 deletions(-)

diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf
index e705d8f..5ae0587 100644
--- a/cloud/azure/iothubs/inputs.tf
+++ b/cloud/azure/iothubs/inputs.tf
@@ -93,4 +93,46 @@ variable "d2c_twin_update_failed_threshold_critical" {
   default = 10
 }
 
-variable "d2c_twin_update_failed_message" {}
\ No newline at end of file
+variable "d2c_twin_update_failed_message" {}
+
+variable "d2c_telemetry_egress_dropped_threshold_warning" {
+  default = 500
+}
+
+variable "d2c_telemetry_egress_dropped_threshold_critical" {
+  default = 1000
+}
+
+variable "d2c_telemetry_egress_dropped_message" {}
+
+variable "d2c_telemetry_egress_orphaned_threshold_warning" {
+  default = 500
+}
+
+variable "d2c_telemetry_egress_orphaned_threshold_critical" {
+  default = 1000
+}
+
+variable "d2c_telemetry_egress_orphaned_message" {}
+
+variable "d2c_telemetry_egress_invalid_threshold_warning" {
+  default = 500
+}
+
+variable "d2c_telemetry_egress_invalid_threshold_critical" {
+  default = 1000
+}
+
+variable "d2c_telemetry_egress_invalid_message" {}
+
+variable "d2c_telemetry_egress_fallback_threshold_warning" {
+  default = 500
+}
+
+variable "d2c_telemetry_egress_fallback_threshold_critical" {
+  default = 1000
+}
+
+variable "d2c_telemetry_egress_fallback_message" {}
+
+variable "d2c_telemetry_ingress_nosent_message" {}
diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf
index f111897..4c59099 100644
--- a/cloud/azure/iothubs/monitors-iothubs.tf
+++ b/cloud/azure/iothubs/monitors-iothubs.tf
@@ -2,7 +2,7 @@ resource "datadog_monitor" "too_many_jobs_failed" {
   name    = "[${var.environment}] IOT Hub Too many jobs failed on {{name}}"
   message = "${var.jobs_failed_message}"
 
-  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.jobs.completed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.jobs_failed_threshold_critical}"
+  query = "avg(last_5m):( avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.jobs.completed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.jobs_failed_threshold_critical}"
   type  = "query alert"
 
   thresholds {
@@ -26,7 +26,7 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
   name    = "[${var.environment}] IOT Hub Too many list_jobs failure on {{name}}"
   message = "${var.listjobs_failed_message}"
 
-  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.list_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() ) ) * 100 > ${var.listjobs_failed_threshold_critical}"
+  query = "avg(last_5m):( avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.list_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() ) ) * 100 > ${var.listjobs_failed_threshold_critical}"
   type  = "query alert"
 
   thresholds {
@@ -50,7 +50,7 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
   name    = "[${var.environment}] IOT Hub Too many query_jobs failed on {{name}}"
   message = "${var.queryjobs_failed_message}"
 
-  query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.query_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() ) ) * 100 > ${var.queryjobs_failed_threshold_critical}"
+  query = "avg(last_5m):( avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.query_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() ) ) * 100 > ${var.queryjobs_failed_threshold_critical}"
   type  = "query alert"
 
   thresholds {
@@ -227,3 +227,118 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 }
+
+resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
+  name    = "[${var.environment}] IOT Hub Too many d2c telemetry egress dropped on {{name}}"
+  message = "${var.d2c_telemetry_egress_dropped_message}"
+
+  query = "sum(last_5m):avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{subscription_id:${var.subscription_id}} by {name,resource_group} > ${var.d2c_telemetry_egress_dropped_threshold_critical}"
+  type  = "query alert"
+
+  thresholds {
+    warning  = "${var.d2c_telemetry_egress_dropped_threshold_warning}"
+    critical = "${var.d2c_telemetry_egress_dropped_threshold_critical}"
+  }
+
+  notify_no_data      = false
+  evaluation_delay    = "${var.delay}"
+  renotify_interval   = 0
+  notify_audit        = false
+  timeout_h           = 0
+  include_tags        = true
+  locked              = false
+  require_full_window = true
+  new_host_delay      = "${var.delay}"
+  no_data_timeframe   = 20
+}
+
+resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
+  name    = "[${var.environment}] IOT Hub Too many d2c telemetry egress orphaned on {{name}}"
+  message = "${var.d2c_telemetry_egress_orphaned_message}"
+
+  query = "sum(last_5m):avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{subscription_id:${var.subscription_id}} by {name,resource_group} > ${var.d2c_telemetry_egress_orphaned_threshold_critical}"
+  type  = "query alert"
+
+  thresholds {
+    warning  = "${var.d2c_telemetry_egress_orphaned_threshold_warning}"
+    critical = "${var.d2c_telemetry_egress_orphaned_threshold_critical}"
+  }
+
+  notify_no_data      = false
+  evaluation_delay    = "${var.delay}"
+  renotify_interval   = 0
+  notify_audit        = false
+  timeout_h           = 0
+  include_tags        = true
+  locked              = false
+  require_full_window = true
+  new_host_delay      = "${var.delay}"
+  no_data_timeframe   = 20
+}
+
+resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
+  name    = "[${var.environment}] IOT Hub Too many d2c telemetry egress invalid on {{name}}"
+  message = "${var.d2c_telemetry_egress_invalid_message}"
+
+  query = "sum(last_5m):avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{subscription_id:${var.subscription_id}} by {name,resource_group} > ${var.d2c_telemetry_egress_invalid_threshold_critical}"
+  type  = "query alert"
+
+  thresholds {
+    warning  = "${var.d2c_telemetry_egress_invalid_threshold_warning}"
+    critical = "${var.d2c_telemetry_egress_invalid_threshold_critical}"
+  }
+
+  notify_no_data      = false
+  evaluation_delay    = "${var.delay}"
+  renotify_interval   = 0
+  notify_audit        = false
+  timeout_h           = 0
+  include_tags        = true
+  locked              = false
+  require_full_window = true
+  new_host_delay      = "${var.delay}"
+  no_data_timeframe   = 20
+}
+
+resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
+  name    = "[${var.environment}] IOT Hub Too many d2c telemetry egress fallback on {{name}}"
+  message = "${var.d2c_telemetry_egress_fallback_message}"
+
+  query = "sum(last_5m):avg:azure.devices_iothubs.d2c.telemetry.egress.fallback{subscription_id:${var.subscription_id}} by {name,resource_group} > ${var.d2c_telemetry_egress_fallback_threshold_critical}"
+  type  = "query alert"
+
+  thresholds {
+    warning  = "${var.d2c_telemetry_egress_fallback_threshold_warning}"
+    critical = "${var.d2c_telemetry_egress_fallback_threshold_critical}"
+  }
+
+  notify_no_data      = false
+  evaluation_delay    = "${var.delay}"
+  renotify_interval   = 0
+  notify_audit        = false
+  timeout_h           = 0
+  include_tags        = true
+  locked              = false
+  require_full_window = true
+  new_host_delay      = "${var.delay}"
+  no_data_timeframe   = 20
+}
+
+resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
+  name    = "[${var.environment}] IOT Hub Too many d2c telemetry ingress no sent on {{name}}"
+  message = "${var.d2c_telemetry_ingress_nosent_message}"
+
+  query = "sum(last_5m):avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() - avg:azure.devices_iothubs.d2c.telemetry.ingress.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() > 0"
+  type  = "query alert"
+
+  notify_no_data      = false
+  evaluation_delay    = "${var.delay}"
+  renotify_interval   = 0
+  notify_audit        = false
+  timeout_h           = 0
+  include_tags        = true
+  locked              = false
+  require_full_window = true
+  new_host_delay      = "${var.delay}"
+  no_data_timeframe   = 20
+}

From cf3309ce753a146901fce7d1bcb4871639f8d410 Mon Sep 17 00:00:00 2001
From: Alexandre Gaillet <alexandre.gaillet@fr.clara.net>
Date: Tue, 31 Oct 2017 11:47:37 +0100
Subject: [PATCH 08/14] MON-80 Add README.md

---
 cloud/azure/iothubs/README.md | 109 ++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 cloud/azure/iothubs/README.md

diff --git a/cloud/azure/iothubs/README.md b/cloud/azure/iothubs/README.md
new file mode 100644
index 0000000..d53bf2b
--- /dev/null
+++ b/cloud/azure/iothubs/README.md
@@ -0,0 +1,109 @@
+Azure Redis DataDog monitors
+============================
+
+How to use this module
+----------------------
+
+```
+module "iothubs" {
+  source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/iothubs?ref=MON-80-azure-hub-iot-monitors"
+  
+  jobs_failed_message                   = "${module.datadog-message-alerting.alerting-message}"
+  listjobs_failed_message               = "${module.datadog-message-alerting.alerting-message}"
+  queryjobs_failed_message              = "${module.datadog-message-alerting.alerting-message}"
+  status_message                        = "${module.datadog-message-alerting.alerting-message}"
+  total_devices_message                 = "${module.datadog-message-alerting.alerting-message}"
+  c2d_methods_failed_message            = "${module.datadog-message-alerting.alerting-message}"
+  c2d_twin_read_failed_message          = "${module.datadog-message-alerting.alerting-message}"
+  c2d_twin_update_failed_message        = "${module.datadog-message-alerting.alerting-message}"
+  d2c_twin_read_failed_message          = "${module.datadog-message-alerting.alerting-message}"
+  d2c_twin_update_failed_message        = "${module.datadog-message-alerting.alerting-message}"
+  d2c_telemetry_egress_dropped_message  = "${module.datadog-message-alerting.alerting-message}"
+  d2c_telemetry_egress_orphaned_message = "${module.datadog-message-alerting.alerting-message}"
+  d2c_telemetry_egress_invalid_message  = "${module.datadog-message-alerting.alerting-message}"
+  d2c_telemetry_egress_fallback_message = "${module.datadog-message-alerting.alerting-message}"
+  d2c_telemetry_ingress_nosent_message  = "${module.datadog-message-alerting.alerting-message}"
+  
+  environment     = "${var.environment}"
+  stack           = "${var.stack}"
+  client_name     = "${var.client_name}"
+  subscription_id = "${var.subscription_id}"
+}
+```
+
+Purpose
+-------
+Creates a DataDog monitors with the following checks :
+
+* Service status check
+* Jobs failed average check
+* Query Jobs failed average check
+* List Jobs failed average check
+* Total devices count check
+* C2D methods failed average check
+* C2D twin read failed average check
+* C2D twin update failed average check
+* D2C twin read failed average check
+* D2C twin update failed average check
+* D2C telemetry egress dropped count check
+* D2C telemetry egress orphaned count check
+* D2C telemetry egress invalid count check
+* D2C telemetry egress fallback count check
+* D2C telemetry ingress no sent count check
+
+Inputs
+------
+
+| Name | Description | Type | Default | Required |
+|------|-------------|:----:|:-----:|:-----:|
+| c2d_methods_failed_message |  | string | - | yes |
+| c2d_methods_failed_threshold_critical |  | string | `10` | no |
+| c2d_methods_failed_threshold_warning |  | string | `0` | no |
+| c2d_twin_read_failed_message |  | string | - | yes |
+| c2d_twin_read_failed_threshold_critical |  | string | `10` | no |
+| c2d_twin_read_failed_threshold_warning |  | string | `0` | no |
+| c2d_twin_update_failed_message |  | string | - | yes |
+| c2d_twin_update_failed_threshold_critical |  | string | `10` | no |
+| c2d_twin_update_failed_threshold_warning |  | string | `0` | no |
+| client_name |  | string | - | yes |
+| d2c_telemetry_egress_dropped_message |  | string | - | yes |
+| d2c_telemetry_egress_dropped_threshold_critical |  | string | `1000` | no |
+| d2c_telemetry_egress_dropped_threshold_warning |  | string | `500` | no |
+| d2c_telemetry_egress_fallback_message |  | string | - | yes |
+| d2c_telemetry_egress_fallback_threshold_critical |  | string | `1000` | no |
+| d2c_telemetry_egress_fallback_threshold_warning |  | string | `500` | no |
+| d2c_telemetry_egress_invalid_message |  | string | - | yes |
+| d2c_telemetry_egress_invalid_threshold_critical |  | string | `1000` | no |
+| d2c_telemetry_egress_invalid_threshold_warning |  | string | `500` | no |
+| d2c_telemetry_egress_orphaned_message |  | string | - | yes |
+| d2c_telemetry_egress_orphaned_threshold_critical |  | string | `1000` | no |
+| d2c_telemetry_egress_orphaned_threshold_warning |  | string | `500` | no |
+| d2c_telemetry_ingress_nosent_message |  | string | - | yes |
+| d2c_twin_read_failed_message |  | string | - | yes |
+| d2c_twin_read_failed_threshold_critical |  | string | `10` | no |
+| d2c_twin_read_failed_threshold_warning |  | string | `0` | no |
+| d2c_twin_update_failed_message |  | string | - | yes |
+| d2c_twin_update_failed_threshold_critical |  | string | `10` | no |
+| d2c_twin_update_failed_threshold_warning |  | string | `0` | no |
+| delay |  | string | `600` | no |
+| environment |  | string | - | yes |
+| jobs_failed_message |  | string | - | yes |
+| jobs_failed_threshold_critical |  | string | `10` | no |
+| jobs_failed_threshold_warning | # IOT hubs | string | `0` | no |
+| listjobs_failed_message |  | string | - | yes |
+| listjobs_failed_threshold_critical |  | string | `10` | no |
+| listjobs_failed_threshold_warning |  | string | `0` | no |
+| queryjobs_failed_message |  | string | - | yes |
+| queryjobs_failed_threshold_critical |  | string | `10` | no |
+| queryjobs_failed_threshold_warning |  | string | `0` | no |
+| stack |  | string | - | yes |
+| status_message |  | string | - | yes |
+| subscription_id |  | string | - | yes |
+| total_devices_message |  | string | - | yes |
+
+Related documentation
+---------------------
+
+DataDog documentation: https://docs.datadoghq.com/integrations/azure_iot_hub/
+
+Azure IOT Hubs metrics documentation: https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health
\ No newline at end of file

From c1563c331898b4ca8b2b08792e27d35e94affed2 Mon Sep 17 00:00:00 2001
From: Alexandre Gaillet <alexandre.gaillet@fr.clara.net>
Date: Tue, 31 Oct 2017 14:25:24 +0100
Subject: [PATCH 09/14] MON-80 use only one message and add inputs descriptions

---
 cloud/azure/iothubs/README.md           |  97 ++++++-----------
 cloud/azure/iothubs/inputs.tf           |  76 +++++++-------
 cloud/azure/iothubs/monitors-iothubs.tf | 133 ++++++++++++++++++------
 3 files changed, 178 insertions(+), 128 deletions(-)

diff --git a/cloud/azure/iothubs/README.md b/cloud/azure/iothubs/README.md
index d53bf2b..3d6bb91 100644
--- a/cloud/azure/iothubs/README.md
+++ b/cloud/azure/iothubs/README.md
@@ -1,4 +1,4 @@
-Azure Redis DataDog monitors
+Azure IOT Hubs DataDog monitors
 ============================
 
 How to use this module
@@ -8,22 +8,8 @@ How to use this module
 module "iothubs" {
   source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/iothubs?ref=MON-80-azure-hub-iot-monitors"
   
-  jobs_failed_message                   = "${module.datadog-message-alerting.alerting-message}"
-  listjobs_failed_message               = "${module.datadog-message-alerting.alerting-message}"
-  queryjobs_failed_message              = "${module.datadog-message-alerting.alerting-message}"
-  status_message                        = "${module.datadog-message-alerting.alerting-message}"
-  total_devices_message                 = "${module.datadog-message-alerting.alerting-message}"
-  c2d_methods_failed_message            = "${module.datadog-message-alerting.alerting-message}"
-  c2d_twin_read_failed_message          = "${module.datadog-message-alerting.alerting-message}"
-  c2d_twin_update_failed_message        = "${module.datadog-message-alerting.alerting-message}"
-  d2c_twin_read_failed_message          = "${module.datadog-message-alerting.alerting-message}"
-  d2c_twin_update_failed_message        = "${module.datadog-message-alerting.alerting-message}"
-  d2c_telemetry_egress_dropped_message  = "${module.datadog-message-alerting.alerting-message}"
-  d2c_telemetry_egress_orphaned_message = "${module.datadog-message-alerting.alerting-message}"
-  d2c_telemetry_egress_invalid_message  = "${module.datadog-message-alerting.alerting-message}"
-  d2c_telemetry_egress_fallback_message = "${module.datadog-message-alerting.alerting-message}"
-  d2c_telemetry_ingress_nosent_message  = "${module.datadog-message-alerting.alerting-message}"
-  
+  message         = "${module.datadog-message-alerting.alerting-message}"
+   
   environment     = "${var.environment}"
   stack           = "${var.stack}"
   client_name     = "${var.client_name}"
@@ -56,54 +42,39 @@ Inputs
 
 | Name | Description | Type | Default | Required |
 |------|-------------|:----:|:-----:|:-----:|
-| c2d_methods_failed_message |  | string | - | yes |
-| c2d_methods_failed_threshold_critical |  | string | `10` | no |
-| c2d_methods_failed_threshold_warning |  | string | `0` | no |
-| c2d_twin_read_failed_message |  | string | - | yes |
-| c2d_twin_read_failed_threshold_critical |  | string | `10` | no |
-| c2d_twin_read_failed_threshold_warning |  | string | `0` | no |
-| c2d_twin_update_failed_message |  | string | - | yes |
-| c2d_twin_update_failed_threshold_critical |  | string | `10` | no |
-| c2d_twin_update_failed_threshold_warning |  | string | `0` | no |
-| client_name |  | string | - | yes |
-| d2c_telemetry_egress_dropped_message |  | string | - | yes |
-| d2c_telemetry_egress_dropped_threshold_critical |  | string | `1000` | no |
-| d2c_telemetry_egress_dropped_threshold_warning |  | string | `500` | no |
-| d2c_telemetry_egress_fallback_message |  | string | - | yes |
-| d2c_telemetry_egress_fallback_threshold_critical |  | string | `1000` | no |
-| d2c_telemetry_egress_fallback_threshold_warning |  | string | `500` | no |
-| d2c_telemetry_egress_invalid_message |  | string | - | yes |
-| d2c_telemetry_egress_invalid_threshold_critical |  | string | `1000` | no |
-| d2c_telemetry_egress_invalid_threshold_warning |  | string | `500` | no |
-| d2c_telemetry_egress_orphaned_message |  | string | - | yes |
-| d2c_telemetry_egress_orphaned_threshold_critical |  | string | `1000` | no |
-| d2c_telemetry_egress_orphaned_threshold_warning |  | string | `500` | no |
-| d2c_telemetry_ingress_nosent_message |  | string | - | yes |
-| d2c_twin_read_failed_message |  | string | - | yes |
-| d2c_twin_read_failed_threshold_critical |  | string | `10` | no |
-| d2c_twin_read_failed_threshold_warning |  | string | `0` | no |
-| d2c_twin_update_failed_message |  | string | - | yes |
-| d2c_twin_update_failed_threshold_critical |  | string | `10` | no |
-| d2c_twin_update_failed_threshold_warning |  | string | `0` | no |
-| delay |  | string | `600` | no |
-| environment |  | string | - | yes |
-| jobs_failed_message |  | string | - | yes |
-| jobs_failed_threshold_critical |  | string | `10` | no |
-| jobs_failed_threshold_warning | # IOT hubs | string | `0` | no |
-| listjobs_failed_message |  | string | - | yes |
-| listjobs_failed_threshold_critical |  | string | `10` | no |
-| listjobs_failed_threshold_warning |  | string | `0` | no |
-| queryjobs_failed_message |  | string | - | yes |
-| queryjobs_failed_threshold_critical |  | string | `10` | no |
-| queryjobs_failed_threshold_warning |  | string | `0` | no |
-| stack |  | string | - | yes |
-| status_message |  | string | - | yes |
-| subscription_id |  | string | - | yes |
-| total_devices_message |  | string | - | yes |
+| c2d_methods_failed_threshold_critical | C2D Methods Failed rate limit (critical threshold) | string | `10` | no |
+| c2d_methods_failed_threshold_warning | C2D Methods Failed rate limit (warning threshold) | string | `0` | no |
+| c2d_twin_read_failed_threshold_critical | C2D Twin Read Failed rate limit (critical threshold) | string | `10` | no |
+| c2d_twin_read_failed_threshold_warning | C2D Twin Read Failed rate limit (warning threshold) | string | `0` | no |
+| c2d_twin_update_failed_threshold_critical | C2D Twin Update Failed rate limit (critical threshold) | string | `10` | no |
+| c2d_twin_update_failed_threshold_warning | C2D Twin Update Failed rate limit (warning threshold) | string | `0` | no |
+| client_name | Client Name | string | - | yes |
+| d2c_telemetry_egress_dropped_threshold_critical | D2C Telemetry Dropped Failed limit (critical threshold) | string | `1000` | no |
+| d2c_telemetry_egress_dropped_threshold_warning | D2C Telemetry Dropped Failed limit (warning threshold) | string | `500` | no |
+| d2c_telemetry_egress_fallback_threshold_critical | D2C Telemetry Fallback Failed limit (critical threshold) | string | `1000` | no |
+| d2c_telemetry_egress_fallback_threshold_warning | D2C Telemetry Fallback Failed limit (warning threshold) | string | `500` | no |
+| d2c_telemetry_egress_invalid_threshold_critical | D2C Telemetry Invalid Failed limit (critical threshold) | string | `1000` | no |
+| d2c_telemetry_egress_invalid_threshold_warning | D2C Telemetry Invalid Failed limit (warning threshold) | string | `500` | no |
+| d2c_telemetry_egress_orphaned_threshold_critical | D2C Telemetry Orphaned Failed limit (critical threshold) | string | `1000` | no |
+| d2c_telemetry_egress_orphaned_threshold_warning | D2C Telemetry Orphaned Failed limit (warning threshold) | string | `500` | no |
+| d2c_twin_read_failed_threshold_critical | D2C Twin Read Failed rate limit (critical threshold) | string | `10` | no |
+| d2c_twin_read_failed_threshold_warning | D2C Twin Read Failed rate limit (warning threshold) | string | `0` | no |
+| d2c_twin_update_failed_threshold_critical | D2C Twin Update Failed rate limit (critical threshold) | string | `10` | no |
+| d2c_twin_update_failed_threshold_warning | D2C Twin Update Failed rate limit (warning threshold) | string | `0` | no |
+| delay | Delay in seconds for the metric evaluation | string | `600` | no |
+| environment | Architecture Environment | string | - | yes |
+| jobs_failed_threshold_critical | Jobs Failed rate limit (critical threshold) | string | `10` | no |
+| jobs_failed_threshold_warning | Jobs Failed rate limit (warning threshold) | string | `0` | no |
+| listjobs_failed_threshold_critical | ListJobs Failed rate limit (critical threshold) | string | `10` | no |
+| listjobs_failed_threshold_warning | ListJobs Failed rate limit (warning threshold) | string | `0` | no |
+| message | Message sent when an alert is triggered | string | - | yes |
+| queryjobs_failed_threshold_critical | QueryJobs Failed rate limit (critical threshold) | string | `10` | no |
+| queryjobs_failed_threshold_warning | QueryJobs Failed rate limit (warning threshold) | string | `0` | no |
+| subscription_id | Subscription ID used to tag monitors | string | - | yes |
 
 Related documentation
 ---------------------
 
-DataDog documentation: https://docs.datadoghq.com/integrations/azure_iot_hub/
+DataDog documentation: [https://docs.datadoghq.com/integrations/azure_iot_hub](https://docs.datadoghq.com/integrations/azure_iot_hub)
 
-Azure IOT Hubs metrics documentation: https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health
\ No newline at end of file
+Azure IOT Hubs metrics documentation: [https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health](https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health)
\ No newline at end of file
diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf
index 5ae0587..cc591cd 100644
--- a/cloud/azure/iothubs/inputs.tf
+++ b/cloud/azure/iothubs/inputs.tf
@@ -1,138 +1,144 @@
-variable "environment" {}
+variable "environment" {
+  description = "Architecture Environment"
+  type        = "string"
+}
 
-variable "stack" {}
+variable "client_name" {
+  description = "Client Name"
+  type        = "string"
+}
 
-variable "client_name" {}
-
-variable "subscription_id" {}
+variable "subscription_id" {
+  description = "Subscription ID used to tag monitors"
+  type        = "string"
+}
 
 variable "delay" {
+  description = "Delay in seconds for the metric evaluation"
   default = 600
 }
 
+variable "message" {
+  description = "Message sent when an alert is triggered"
+}
+
 ## IOT hubs
 variable "jobs_failed_threshold_warning" {
+  description = "Jobs Failed rate limit (warning threshold)"
   default = 0
 }
 
 variable "jobs_failed_threshold_critical" {
+  description = "Jobs Failed rate limit (critical threshold)"
   default = 10
 }
 
-variable "jobs_failed_message" {}
-
 variable "listjobs_failed_threshold_warning" {
+  description = "ListJobs Failed rate limit (warning threshold)"
   default = 0
 }
 
 variable "listjobs_failed_threshold_critical" {
+  description = "ListJobs Failed rate limit (critical threshold)"
   default = 10
 }
 
-variable "listjobs_failed_message" {}
-
 variable "queryjobs_failed_threshold_warning" {
+  description = "QueryJobs Failed rate limit (warning threshold)"
   default = 0
 }
 
 variable "queryjobs_failed_threshold_critical" {
+  description = "QueryJobs Failed rate limit (critical threshold)"
   default = 10
 }
 
-variable "queryjobs_failed_message" {}
-
-variable "status_message" {}
-
-variable "total_devices_message" {}
-
 variable "c2d_methods_failed_threshold_warning" {
+  description = "C2D Methods Failed rate limit (warning threshold)"
   default = 0
 }
 
 variable "c2d_methods_failed_threshold_critical" {
+  description = "C2D Methods Failed rate limit (critical threshold)"
   default = 10
 }
 
-variable "c2d_methods_failed_message" {}
-
 variable "c2d_twin_read_failed_threshold_warning" {
+  description = "C2D Twin Read Failed rate limit (warning threshold)"
   default = 0
 }
 
 variable "c2d_twin_read_failed_threshold_critical" {
+  description = "C2D Twin Read Failed rate limit (critical threshold)"
   default = 10
 }
 
-variable "c2d_twin_read_failed_message" {}
-
 variable "c2d_twin_update_failed_threshold_warning" {
+  description = "C2D Twin Update Failed rate limit (warning threshold)"
   default = 0
 }
 
 variable "c2d_twin_update_failed_threshold_critical" {
+  description = "C2D Twin Update Failed rate limit (critical threshold)"
   default = 10
 }
 
-variable "c2d_twin_update_failed_message" {}
-
 variable "d2c_twin_read_failed_threshold_warning" {
+  description = "D2C Twin Read Failed rate limit (warning threshold)"
   default = 0
 }
 
 variable "d2c_twin_read_failed_threshold_critical" {
+  description = "D2C Twin Read Failed rate limit (critical threshold)"
   default = 10
 }
 
-variable "d2c_twin_read_failed_message" {}
-
 variable "d2c_twin_update_failed_threshold_warning" {
+  description = "D2C Twin Update Failed rate limit (warning threshold)"
   default = 0
 }
 
 variable "d2c_twin_update_failed_threshold_critical" {
+  description = "D2C Twin Update Failed rate limit (critical threshold)"
   default = 10
 }
 
-variable "d2c_twin_update_failed_message" {}
-
 variable "d2c_telemetry_egress_dropped_threshold_warning" {
+  description = "D2C Telemetry Dropped Failed limit (warning threshold)"
   default = 500
 }
 
 variable "d2c_telemetry_egress_dropped_threshold_critical" {
+  description = "D2C Telemetry Dropped Failed limit (critical threshold)"
   default = 1000
 }
 
-variable "d2c_telemetry_egress_dropped_message" {}
-
 variable "d2c_telemetry_egress_orphaned_threshold_warning" {
+  description = "D2C Telemetry Orphaned Failed limit (warning threshold)"
   default = 500
 }
 
 variable "d2c_telemetry_egress_orphaned_threshold_critical" {
+  description = "D2C Telemetry Orphaned Failed limit (critical threshold)"
   default = 1000
 }
 
-variable "d2c_telemetry_egress_orphaned_message" {}
-
 variable "d2c_telemetry_egress_invalid_threshold_warning" {
+  description = "D2C Telemetry Invalid Failed limit (warning threshold)"
   default = 500
 }
 
 variable "d2c_telemetry_egress_invalid_threshold_critical" {
+  description = "D2C Telemetry Invalid Failed limit (critical threshold)"
   default = 1000
 }
 
-variable "d2c_telemetry_egress_invalid_message" {}
-
 variable "d2c_telemetry_egress_fallback_threshold_warning" {
+  description = "D2C Telemetry Fallback Failed limit (warning threshold)"
   default = 500
 }
 
 variable "d2c_telemetry_egress_fallback_threshold_critical" {
+  description = "D2C Telemetry Fallback Failed limit (critical threshold)"
   default = 1000
 }
-
-variable "d2c_telemetry_egress_fallback_message" {}
-
-variable "d2c_telemetry_ingress_nosent_message" {}
diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf
index 4c59099..f4a7073 100644
--- a/cloud/azure/iothubs/monitors-iothubs.tf
+++ b/cloud/azure/iothubs/monitors-iothubs.tf
@@ -1,8 +1,14 @@
 resource "datadog_monitor" "too_many_jobs_failed" {
   name    = "[${var.environment}] IOT Hub Too many jobs failed on {{name}}"
-  message = "${var.jobs_failed_message}"
+  message = "${var.message}"
 
-  query = "avg(last_5m):( avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.jobs.completed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.jobs_failed_threshold_critical}"
+  query = <<EOF
+          avg(last_5m):(
+            avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
+            ( avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
+                avg:azure.devices_iothubs.jobs.completed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
+          ) * 100 > ${var.jobs_failed_threshold_critical}
+  EOF
   type  = "query alert"
 
   thresholds {
@@ -24,9 +30,15 @@ resource "datadog_monitor" "too_many_jobs_failed" {
 
 resource "datadog_monitor" "too_many_list_jobs_failed" {
   name    = "[${var.environment}] IOT Hub Too many list_jobs failure on {{name}}"
-  message = "${var.listjobs_failed_message}"
+  message = "${var.message}"
 
-  query = "avg(last_5m):( avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.list_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() ) ) * 100 > ${var.listjobs_failed_threshold_critical}"
+  query = <<EOF
+          avg(last_5m):(
+            avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() /
+              ( avg:azure.devices_iothubs.jobs.list_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() +
+                  avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() )
+          ) * 100 > ${var.listjobs_failed_threshold_critical}
+  EOF
   type  = "query alert"
 
   thresholds {
@@ -48,9 +60,15 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
 
 resource "datadog_monitor" "too_many_query_jobs_failed" {
   name    = "[${var.environment}] IOT Hub Too many query_jobs failed on {{name}}"
-  message = "${var.queryjobs_failed_message}"
+  message = "${var.message}"
 
-  query = "avg(last_5m):( avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.query_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() ) ) * 100 > ${var.queryjobs_failed_threshold_critical}"
+  query = <<EOF
+    avg(last_5m):(
+      avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() /
+        ( avg:azure.devices_iothubs.jobs.query_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() +
+            avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() )
+    ) * 100 > ${var.queryjobs_failed_threshold_critical}
+  EOF
   type  = "query alert"
 
   thresholds {
@@ -72,9 +90,11 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
 
 resource "datadog_monitor" "status" {
   name    = "[${var.environment}] IOT Hub Status is not ok on {{name}}"
-  message = "${var.status_message}"
+  message = "${var.message}"
 
-  query = "avg(last_5m):avg:azure.devices_iothubs.status{subscription_id:${var.subscription_id}} by {name,resource_group} < 1"
+  query = <<EOF
+    avg(last_5m):avg:azure.devices_iothubs.status{subscription_id:${var.subscription_id}} by {name,resource_group} < 1
+  EOF
   type  = "query alert"
 
   notify_no_data      = true
@@ -91,9 +111,11 @@ resource "datadog_monitor" "status" {
 
 resource "datadog_monitor" "total_devices" {
   name    = "[${var.environment}] IOT Hub Total devices is wrong on {{name}}"
-  message = "${var.total_devices_message}"
+  message = "${var.message}"
 
-  query = "avg(last_5m):avg:azure.devices_iothubs.devices.total_devices{subscription_id:${var.subscription_id}} by {name,resource_group} == 0"
+  query = <<EOF
+    avg(last_5m):avg:azure.devices_iothubs.devices.total_devices{subscription_id:${var.subscription_id}} by {name,resource_group} == 0
+  EOF
   type  = "query alert"
 
   notify_no_data      = true
@@ -110,9 +132,15 @@ resource "datadog_monitor" "total_devices" {
 
 resource "datadog_monitor" "too_many_c2d_methods_failed" {
   name    = "[${var.environment}] IOT Hub Too many c2d methods failure on {{name}}"
-  message = "${var.c2d_methods_failed_message}"
+  message = "${var.message}"
 
-  query = "avg(last_5m):( avg:azure.devices_iothubs.c2d.methods.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.c2d.methods.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.c2d.methods.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.c2d_methods_failed_threshold_critical}"
+  query = <<EOF
+    avg(last_5m):(
+      avg:azure.devices_iothubs.c2d.methods.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
+        ( avg:azure.devices_iothubs.c2d.methods.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
+            avg:azure.devices_iothubs.c2d.methods.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
+    ) * 100 > ${var.c2d_methods_failed_threshold_critical}
+  EOF
   type  = "query alert"
 
   thresholds {
@@ -134,9 +162,15 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
 
 resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
   name    = "[${var.environment}] IOT Hub Too many c2d twin read failure on {{name}}"
-  message = "${var.c2d_twin_read_failed_message}"
+  message = "${var.message}"
 
-  query = "avg(last_5m):( avg:azure.devices_iothubs.c2d.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.c2d.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.c2d.twin.read.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.c2d_twin_read_failed_threshold_critical}"
+  query = <<EOF
+    avg(last_5m):(
+      avg:azure.devices_iothubs.c2d.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
+        ( avg:azure.devices_iothubs.c2d.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
+            avg:azure.devices_iothubs.c2d.twin.read.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
+    ) * 100 > ${var.c2d_twin_read_failed_threshold_critical}
+  EOF
   type  = "query alert"
 
   thresholds {
@@ -158,9 +192,15 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
 
 resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
   name    = "[${var.environment}] IOT Hub Too many c2d twin update failure on {{name}}"
-  message = "${var.c2d_twin_update_failed_message}"
+  message = "${var.message}"
 
-  query = "avg(last_5m):( avg:azure.devices_iothubs.c2d.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.c2d.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.c2d.twin.update.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.c2d_twin_update_failed_threshold_critical}"
+  query = <<EOF
+    avg(last_5m):(
+      avg:azure.devices_iothubs.c2d.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
+      ( avg:azure.devices_iothubs.c2d.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
+          avg:azure.devices_iothubs.c2d.twin.update.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
+    ) * 100 > ${var.c2d_twin_update_failed_threshold_critical}
+  EOF
   type  = "query alert"
 
   thresholds {
@@ -182,9 +222,15 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
 
 resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
   name    = "[${var.environment}] IOT Hub Too many d2c twin read failure on {{name}}"
-  message = "${var.d2c_twin_read_failed_message}"
+  message = "${var.message}"
 
-  query = "avg(last_5m):( avg:azure.devices_iothubs.d2c.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.d2c.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.d2c.twin.read.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.d2c_twin_read_failed_threshold_critical}"
+  query = <<EOF
+    avg(last_5m):(
+      avg:azure.devices_iothubs.d2c.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
+        ( avg:azure.devices_iothubs.d2c.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
+          avg:azure.devices_iothubs.d2c.twin.read.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
+    ) * 100 > ${var.d2c_twin_read_failed_threshold_critical}
+  EOF
   type  = "query alert"
 
   thresholds {
@@ -206,9 +252,15 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
 
 resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
   name    = "[${var.environment}] IOT Hub Too many d2c twin update failure on {{name}}"
-  message = "${var.d2c_twin_update_failed_message}"
+  message = "${var.message}"
 
-  query = "avg(last_5m):( avg:azure.devices_iothubs.d2c.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.d2c.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.d2c.twin.update.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) ) * 100 > ${var.d2c_twin_update_failed_threshold_critical}"
+  query = <<EOF
+    avg(last_5m):(
+      avg:azure.devices_iothubs.d2c.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
+        ( avg:azure.devices_iothubs.d2c.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
+          avg:azure.devices_iothubs.d2c.twin.update.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
+    ) * 100 > ${var.d2c_twin_update_failed_threshold_critical}
+  EOF
   type  = "query alert"
 
   thresholds {
@@ -230,9 +282,13 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
 
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
   name    = "[${var.environment}] IOT Hub Too many d2c telemetry egress dropped on {{name}}"
-  message = "${var.d2c_telemetry_egress_dropped_message}"
+  message = "${var.message}"
 
-  query = "sum(last_5m):avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{subscription_id:${var.subscription_id}} by {name,resource_group} > ${var.d2c_telemetry_egress_dropped_threshold_critical}"
+  query = <<EOF
+      sum(last_5m): (
+        avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count()
+      ) > ${var.d2c_telemetry_egress_dropped_threshold_critical}
+  EOF
   type  = "query alert"
 
   thresholds {
@@ -254,9 +310,13 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
 
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
   name    = "[${var.environment}] IOT Hub Too many d2c telemetry egress orphaned on {{name}}"
-  message = "${var.d2c_telemetry_egress_orphaned_message}"
+  message = "${var.message}"
 
-  query = "sum(last_5m):avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{subscription_id:${var.subscription_id}} by {name,resource_group} > ${var.d2c_telemetry_egress_orphaned_threshold_critical}"
+  query = <<EOF
+    sum(last_5m): (
+      avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count()
+    ) > ${var.d2c_telemetry_egress_orphaned_threshold_critical}
+  EOF
   type  = "query alert"
 
   thresholds {
@@ -278,9 +338,13 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
 
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
   name    = "[${var.environment}] IOT Hub Too many d2c telemetry egress invalid on {{name}}"
-  message = "${var.d2c_telemetry_egress_invalid_message}"
+  message = "${var.message}"
 
-  query = "sum(last_5m):avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{subscription_id:${var.subscription_id}} by {name,resource_group} > ${var.d2c_telemetry_egress_invalid_threshold_critical}"
+  query = <<EOF
+    sum(last_5m): (
+      avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count()
+    ) > ${var.d2c_telemetry_egress_invalid_threshold_critical}
+  EOF
   type  = "query alert"
 
   thresholds {
@@ -302,9 +366,13 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
 
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
   name    = "[${var.environment}] IOT Hub Too many d2c telemetry egress fallback on {{name}}"
-  message = "${var.d2c_telemetry_egress_fallback_message}"
+  message = "${var.message}"
 
-  query = "sum(last_5m):avg:azure.devices_iothubs.d2c.telemetry.egress.fallback{subscription_id:${var.subscription_id}} by {name,resource_group} > ${var.d2c_telemetry_egress_fallback_threshold_critical}"
+  query = <<EOF
+    sum(last_5m): (
+      avg:azure.devices_iothubs.d2c.telemetry.egress.fallback{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count()
+    )  > ${var.d2c_telemetry_egress_fallback_threshold_critical}
+  EOF
   type  = "query alert"
 
   thresholds {
@@ -326,9 +394,14 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
 
 resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
   name    = "[${var.environment}] IOT Hub Too many d2c telemetry ingress no sent on {{name}}"
-  message = "${var.d2c_telemetry_ingress_nosent_message}"
+  message = "${var.message}"
 
-  query = "sum(last_5m):avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() - avg:azure.devices_iothubs.d2c.telemetry.ingress.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() > 0"
+  query = <<EOF
+    sum(last_5m): (
+      avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() -
+        avg:azure.devices_iothubs.d2c.telemetry.ingress.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count()
+    ) > 0
+  EOF
   type  = "query alert"
 
   notify_no_data      = false

From 9186c6915042ad8f969e05e94ce5db7a5a6fc188 Mon Sep 17 00:00:00 2001
From: Alexandre Gaillet <alexandre.gaillet@fr.clara.net>
Date: Tue, 31 Oct 2017 15:37:13 +0100
Subject: [PATCH 10/14] MON-80 Now support use_filter_tags

---
 cloud/azure/iothubs/inputs.tf           | 5 +++++
 cloud/azure/iothubs/monitors-iothubs.tf | 8 ++++++++
 2 files changed, 13 insertions(+)

diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf
index cc591cd..d04d03b 100644
--- a/cloud/azure/iothubs/inputs.tf
+++ b/cloud/azure/iothubs/inputs.tf
@@ -8,6 +8,11 @@ variable "client_name" {
   type        = "string"
 }
 
+variable "use_filter_tags" {
+  description = "Filter the data with service tags if true"
+  default     = "true"
+}
+
 variable "subscription_id" {
   description = "Subscription ID used to tag monitors"
   type        = "string"
diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf
index f4a7073..1ee29a3 100644
--- a/cloud/azure/iothubs/monitors-iothubs.tf
+++ b/cloud/azure/iothubs/monitors-iothubs.tf
@@ -1,3 +1,11 @@
+data "template_file" "filter" {
+  template = "$${filter}"
+
+  vars {
+    filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_appservices:enabled,subscription_id:%s,env:%s", var.subscription_id,var.environment) : var.subscription_id}"
+  }
+}
+
 resource "datadog_monitor" "too_many_jobs_failed" {
   name    = "[${var.environment}] IOT Hub Too many jobs failed on {{name}}"
   message = "${var.message}"

From 4f2d9bd6943231e0f980e02dad53ae85098c795b Mon Sep 17 00:00:00 2001
From: Quentin Manfroi <quentin.manfroi@fr.clara.net>
Date: Fri, 3 Nov 2017 20:35:35 +0100
Subject: [PATCH 11/14] MON-80 add tags

---
 cloud/azure/iothubs/README.md           |  6 +-
 cloud/azure/iothubs/inputs.tf           | 34 +++++----
 cloud/azure/iothubs/monitors-iothubs.tf | 96 ++++++++++++++++---------
 3 files changed, 85 insertions(+), 51 deletions(-)

diff --git a/cloud/azure/iothubs/README.md b/cloud/azure/iothubs/README.md
index 3d6bb91..a0e4be5 100644
--- a/cloud/azure/iothubs/README.md
+++ b/cloud/azure/iothubs/README.md
@@ -9,10 +9,7 @@ module "iothubs" {
   source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/iothubs?ref=MON-80-azure-hub-iot-monitors"
   
   message         = "${module.datadog-message-alerting.alerting-message}"
-   
   environment     = "${var.environment}"
-  stack           = "${var.stack}"
-  client_name     = "${var.client_name}"
   subscription_id = "${var.subscription_id}"
 }
 ```
@@ -48,7 +45,6 @@ Inputs
 | c2d_twin_read_failed_threshold_warning | C2D Twin Read Failed rate limit (warning threshold) | string | `0` | no |
 | c2d_twin_update_failed_threshold_critical | C2D Twin Update Failed rate limit (critical threshold) | string | `10` | no |
 | c2d_twin_update_failed_threshold_warning | C2D Twin Update Failed rate limit (warning threshold) | string | `0` | no |
-| client_name | Client Name | string | - | yes |
 | d2c_telemetry_egress_dropped_threshold_critical | D2C Telemetry Dropped Failed limit (critical threshold) | string | `1000` | no |
 | d2c_telemetry_egress_dropped_threshold_warning | D2C Telemetry Dropped Failed limit (warning threshold) | string | `500` | no |
 | d2c_telemetry_egress_fallback_threshold_critical | D2C Telemetry Fallback Failed limit (critical threshold) | string | `1000` | no |
@@ -77,4 +73,4 @@ Related documentation
 
 DataDog documentation: [https://docs.datadoghq.com/integrations/azure_iot_hub](https://docs.datadoghq.com/integrations/azure_iot_hub)
 
-Azure IOT Hubs metrics documentation: [https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health](https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health)
\ No newline at end of file
+Azure IOT Hubs metrics documentation: [https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health](https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health)
diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf
index d04d03b..1efabc3 100644
--- a/cloud/azure/iothubs/inputs.tf
+++ b/cloud/azure/iothubs/inputs.tf
@@ -1,23 +1,26 @@
+# Global Terraform
 variable "environment" {
   description = "Architecture Environment"
   type        = "string"
 }
 
-variable "client_name" {
-  description = "Client Name"
-  type        = "string"
-}
-
-variable "use_filter_tags" {
-  description = "Filter the data with service tags if true"
-  default     = "true"
-}
-
 variable "subscription_id" {
-  description = "Subscription ID used to tag monitors"
-  type        = "string"
+  description = "Azure account id used as filter for monitors"
+  type = "string"
 }
 
+variable "provider" {
+  description = "Cloud provider which the monitor and its based metric depend on"
+  type = "string"
+  default = "azure"
+}
+
+variable "service" {
+  description = "Service monitored by this set of monitors"
+  type = "string"
+  default = "storage"
+
+# Global DataDog
 variable "delay" {
   description = "Delay in seconds for the metric evaluation"
   default = 600
@@ -27,7 +30,12 @@ variable "message" {
   description = "Message sent when an alert is triggered"
 }
 
-## IOT hubs
+variable "use_filter_tags" {
+  description = "Filter the data with service tags if true"
+  default     = "true"
+}
+
+# Azure IOT hubs specific
 variable "jobs_failed_threshold_warning" {
   description = "Jobs Failed rate limit (warning threshold)"
   default = 0
diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf
index 1ee29a3..4398f5f 100644
--- a/cloud/azure/iothubs/monitors-iothubs.tf
+++ b/cloud/azure/iothubs/monitors-iothubs.tf
@@ -2,7 +2,7 @@ data "template_file" "filter" {
   template = "$${filter}"
 
   vars {
-    filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_appservices:enabled,subscription_id:%s,env:%s", var.subscription_id,var.environment) : var.subscription_id}"
+    filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "${data.template_file.filter.rendered}"}"
   }
 }
 
@@ -12,9 +12,9 @@ resource "datadog_monitor" "too_many_jobs_failed" {
 
   query = <<EOF
           avg(last_5m):(
-            avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
-            ( avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
-                avg:azure.devices_iothubs.jobs.completed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
+            avg:azure.devices_iothubs.jobs.failed{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
+            ( avg:azure.devices_iothubs.jobs.failed{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
+                avg:azure.devices_iothubs.jobs.completed{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
           ) * 100 > ${var.jobs_failed_threshold_critical}
   EOF
   type  = "query alert"
@@ -34,6 +34,8 @@ resource "datadog_monitor" "too_many_jobs_failed" {
   require_full_window = true
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
+
+  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_list_jobs_failed" {
@@ -42,9 +44,9 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
 
   query = <<EOF
           avg(last_5m):(
-            avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() /
-              ( avg:azure.devices_iothubs.jobs.list_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() +
-                  avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() )
+            avg:azure.devices_iothubs.jobs.list_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() /
+              ( avg:azure.devices_iothubs.jobs.list_jobs.success{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() +
+                  avg:azure.devices_iothubs.jobs.list_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() )
           ) * 100 > ${var.listjobs_failed_threshold_critical}
   EOF
   type  = "query alert"
@@ -64,6 +66,8 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
   require_full_window = true
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
+
+  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_query_jobs_failed" {
@@ -72,9 +76,9 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
 
   query = <<EOF
     avg(last_5m):(
-      avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() /
-        ( avg:azure.devices_iothubs.jobs.query_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() +
-            avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() )
+      avg:azure.devices_iothubs.jobs.query_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() /
+        ( avg:azure.devices_iothubs.jobs.query_jobs.success{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() +
+            avg:azure.devices_iothubs.jobs.query_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() )
     ) * 100 > ${var.queryjobs_failed_threshold_critical}
   EOF
   type  = "query alert"
@@ -94,6 +98,8 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
   require_full_window = true
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
+
+  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
 }
 
 resource "datadog_monitor" "status" {
@@ -101,7 +107,7 @@ resource "datadog_monitor" "status" {
   message = "${var.message}"
 
   query = <<EOF
-    avg(last_5m):avg:azure.devices_iothubs.status{subscription_id:${var.subscription_id}} by {name,resource_group} < 1
+    avg(last_5m):avg:azure.devices_iothubs.status{${data.template_file.filter.rendered}} by {name,resource_group} < 1
   EOF
   type  = "query alert"
 
@@ -115,6 +121,8 @@ resource "datadog_monitor" "status" {
   require_full_window = true
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
+
+  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
 }
 
 resource "datadog_monitor" "total_devices" {
@@ -122,7 +130,7 @@ resource "datadog_monitor" "total_devices" {
   message = "${var.message}"
 
   query = <<EOF
-    avg(last_5m):avg:azure.devices_iothubs.devices.total_devices{subscription_id:${var.subscription_id}} by {name,resource_group} == 0
+    avg(last_5m):avg:azure.devices_iothubs.devices.total_devices{${data.template_file.filter.rendered}} by {name,resource_group} == 0
   EOF
   type  = "query alert"
 
@@ -136,6 +144,8 @@ resource "datadog_monitor" "total_devices" {
   require_full_window = true
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
+
+  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_c2d_methods_failed" {
@@ -144,9 +154,9 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
 
   query = <<EOF
     avg(last_5m):(
-      avg:azure.devices_iothubs.c2d.methods.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
-        ( avg:azure.devices_iothubs.c2d.methods.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
-            avg:azure.devices_iothubs.c2d.methods.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
+      avg:azure.devices_iothubs.c2d.methods.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
+        ( avg:azure.devices_iothubs.c2d.methods.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
+            avg:azure.devices_iothubs.c2d.methods.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
     ) * 100 > ${var.c2d_methods_failed_threshold_critical}
   EOF
   type  = "query alert"
@@ -166,6 +176,8 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
   require_full_window = true
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
+
+  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
@@ -174,9 +186,9 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
 
   query = <<EOF
     avg(last_5m):(
-      avg:azure.devices_iothubs.c2d.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
-        ( avg:azure.devices_iothubs.c2d.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
-            avg:azure.devices_iothubs.c2d.twin.read.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
+      avg:azure.devices_iothubs.c2d.twin.read.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
+        ( avg:azure.devices_iothubs.c2d.twin.read.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
+            avg:azure.devices_iothubs.c2d.twin.read.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
     ) * 100 > ${var.c2d_twin_read_failed_threshold_critical}
   EOF
   type  = "query alert"
@@ -196,6 +208,8 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
   require_full_window = true
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
+
+  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
@@ -204,9 +218,9 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
 
   query = <<EOF
     avg(last_5m):(
-      avg:azure.devices_iothubs.c2d.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
-      ( avg:azure.devices_iothubs.c2d.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
-          avg:azure.devices_iothubs.c2d.twin.update.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
+      avg:azure.devices_iothubs.c2d.twin.update.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
+      ( avg:azure.devices_iothubs.c2d.twin.update.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
+          avg:azure.devices_iothubs.c2d.twin.update.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
     ) * 100 > ${var.c2d_twin_update_failed_threshold_critical}
   EOF
   type  = "query alert"
@@ -226,6 +240,8 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
   require_full_window = true
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
+
+  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
@@ -234,9 +250,9 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
 
   query = <<EOF
     avg(last_5m):(
-      avg:azure.devices_iothubs.d2c.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
-        ( avg:azure.devices_iothubs.d2c.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
-          avg:azure.devices_iothubs.d2c.twin.read.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
+      avg:azure.devices_iothubs.d2c.twin.read.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
+        ( avg:azure.devices_iothubs.d2c.twin.read.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
+          avg:azure.devices_iothubs.d2c.twin.read.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
     ) * 100 > ${var.d2c_twin_read_failed_threshold_critical}
   EOF
   type  = "query alert"
@@ -256,6 +272,8 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
   require_full_window = true
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
+
+  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
@@ -264,9 +282,9 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
 
   query = <<EOF
     avg(last_5m):(
-      avg:azure.devices_iothubs.d2c.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
-        ( avg:azure.devices_iothubs.d2c.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
-          avg:azure.devices_iothubs.d2c.twin.update.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
+      avg:azure.devices_iothubs.d2c.twin.update.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
+        ( avg:azure.devices_iothubs.d2c.twin.update.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
+          avg:azure.devices_iothubs.d2c.twin.update.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
     ) * 100 > ${var.d2c_twin_update_failed_threshold_critical}
   EOF
   type  = "query alert"
@@ -286,6 +304,8 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
   require_full_window = true
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
+
+  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
@@ -294,7 +314,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
 
   query = <<EOF
       sum(last_5m): (
-        avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count()
+        avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
       ) > ${var.d2c_telemetry_egress_dropped_threshold_critical}
   EOF
   type  = "query alert"
@@ -314,6 +334,8 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
   require_full_window = true
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
+
+  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
@@ -322,7 +344,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
 
   query = <<EOF
     sum(last_5m): (
-      avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count()
+      avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
     ) > ${var.d2c_telemetry_egress_orphaned_threshold_critical}
   EOF
   type  = "query alert"
@@ -342,6 +364,8 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
   require_full_window = true
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
+
+  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
@@ -350,7 +374,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
 
   query = <<EOF
     sum(last_5m): (
-      avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count()
+      avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
     ) > ${var.d2c_telemetry_egress_invalid_threshold_critical}
   EOF
   type  = "query alert"
@@ -370,6 +394,8 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
   require_full_window = true
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
+
+  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
@@ -378,7 +404,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
 
   query = <<EOF
     sum(last_5m): (
-      avg:azure.devices_iothubs.d2c.telemetry.egress.fallback{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count()
+      avg:azure.devices_iothubs.d2c.telemetry.egress.fallback{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
     )  > ${var.d2c_telemetry_egress_fallback_threshold_critical}
   EOF
   type  = "query alert"
@@ -398,6 +424,8 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
   require_full_window = true
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
+
+  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
@@ -406,8 +434,8 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
 
   query = <<EOF
     sum(last_5m): (
-      avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() -
-        avg:azure.devices_iothubs.d2c.telemetry.ingress.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count()
+      avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() -
+        avg:azure.devices_iothubs.d2c.telemetry.ingress.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
     ) > 0
   EOF
   type  = "query alert"
@@ -422,4 +450,6 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
   require_full_window = true
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
+
+  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
 }

From 2593e5fac4e9da58531e7195af53e5c62802c424 Mon Sep 17 00:00:00 2001
From: Quentin Manfroi <quentin.manfroi@fr.clara.net>
Date: Fri, 3 Nov 2017 20:47:29 +0100
Subject: [PATCH 12/14] MON-80 update readme

---
 cloud/azure/iothubs/README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cloud/azure/iothubs/README.md b/cloud/azure/iothubs/README.md
index a0e4be5..362e226 100644
--- a/cloud/azure/iothubs/README.md
+++ b/cloud/azure/iothubs/README.md
@@ -64,8 +64,10 @@ Inputs
 | listjobs_failed_threshold_critical | ListJobs Failed rate limit (critical threshold) | string | `10` | no |
 | listjobs_failed_threshold_warning | ListJobs Failed rate limit (warning threshold) | string | `0` | no |
 | message | Message sent when an alert is triggered | string | - | yes |
+| provider | What is the monitored provider | string | azure | no |
 | queryjobs_failed_threshold_critical | QueryJobs Failed rate limit (critical threshold) | string | `10` | no |
 | queryjobs_failed_threshold_warning | QueryJobs Failed rate limit (warning threshold) | string | `0` | no |
+| service | What is the monitored service | string | storage | no |
 | subscription_id | Subscription ID used to tag monitors | string | - | yes |
 
 Related documentation

From e0fa47008ae60aa0ba97eb6b2c33d40b0c2e596a Mon Sep 17 00:00:00 2001
From: Alexandre Gaillet <alexandre.gaillet@fr.clara.net>
Date: Mon, 6 Nov 2017 10:30:00 +0100
Subject: [PATCH 13/14] MON-80 Update variables' names

---
 cloud/azure/iothubs/README.md           |  57 ++++-----
 cloud/azure/iothubs/inputs.tf           | 125 ++++++++++----------
 cloud/azure/iothubs/monitors-iothubs.tf | 147 +++++++++++++-----------
 3 files changed, 173 insertions(+), 156 deletions(-)

diff --git a/cloud/azure/iothubs/README.md b/cloud/azure/iothubs/README.md
index 362e226..339b357 100644
--- a/cloud/azure/iothubs/README.md
+++ b/cloud/azure/iothubs/README.md
@@ -1,5 +1,5 @@
 Azure IOT Hubs DataDog monitors
-============================
+===============================
 
 How to use this module
 ----------------------
@@ -39,36 +39,37 @@ Inputs
 
 | Name | Description | Type | Default | Required |
 |------|-------------|:----:|:-----:|:-----:|
-| c2d_methods_failed_threshold_critical | C2D Methods Failed rate limit (critical threshold) | string | `10` | no |
-| c2d_methods_failed_threshold_warning | C2D Methods Failed rate limit (warning threshold) | string | `0` | no |
-| c2d_twin_read_failed_threshold_critical | C2D Twin Read Failed rate limit (critical threshold) | string | `10` | no |
-| c2d_twin_read_failed_threshold_warning | C2D Twin Read Failed rate limit (warning threshold) | string | `0` | no |
-| c2d_twin_update_failed_threshold_critical | C2D Twin Update Failed rate limit (critical threshold) | string | `10` | no |
-| c2d_twin_update_failed_threshold_warning | C2D Twin Update Failed rate limit (warning threshold) | string | `0` | no |
-| d2c_telemetry_egress_dropped_threshold_critical | D2C Telemetry Dropped Failed limit (critical threshold) | string | `1000` | no |
-| d2c_telemetry_egress_dropped_threshold_warning | D2C Telemetry Dropped Failed limit (warning threshold) | string | `500` | no |
-| d2c_telemetry_egress_fallback_threshold_critical | D2C Telemetry Fallback Failed limit (critical threshold) | string | `1000` | no |
-| d2c_telemetry_egress_fallback_threshold_warning | D2C Telemetry Fallback Failed limit (warning threshold) | string | `500` | no |
-| d2c_telemetry_egress_invalid_threshold_critical | D2C Telemetry Invalid Failed limit (critical threshold) | string | `1000` | no |
-| d2c_telemetry_egress_invalid_threshold_warning | D2C Telemetry Invalid Failed limit (warning threshold) | string | `500` | no |
-| d2c_telemetry_egress_orphaned_threshold_critical | D2C Telemetry Orphaned Failed limit (critical threshold) | string | `1000` | no |
-| d2c_telemetry_egress_orphaned_threshold_warning | D2C Telemetry Orphaned Failed limit (warning threshold) | string | `500` | no |
-| d2c_twin_read_failed_threshold_critical | D2C Twin Read Failed rate limit (critical threshold) | string | `10` | no |
-| d2c_twin_read_failed_threshold_warning | D2C Twin Read Failed rate limit (warning threshold) | string | `0` | no |
-| d2c_twin_update_failed_threshold_critical | D2C Twin Update Failed rate limit (critical threshold) | string | `10` | no |
-| d2c_twin_update_failed_threshold_warning | D2C Twin Update Failed rate limit (warning threshold) | string | `0` | no |
 | delay | Delay in seconds for the metric evaluation | string | `600` | no |
+| dropped_d2c_telemetry_egress_threshold_critical | D2C Telemetry Dropped limit (critical threshold) | string | `1000` | no |
+| dropped_d2c_telemetry_egress_threshold_warning | D2C Telemetry Dropped limit (warning threshold) | string | `500` | no |
 | environment | Architecture Environment | string | - | yes |
-| jobs_failed_threshold_critical | Jobs Failed rate limit (critical threshold) | string | `10` | no |
-| jobs_failed_threshold_warning | Jobs Failed rate limit (warning threshold) | string | `0` | no |
-| listjobs_failed_threshold_critical | ListJobs Failed rate limit (critical threshold) | string | `10` | no |
-| listjobs_failed_threshold_warning | ListJobs Failed rate limit (warning threshold) | string | `0` | no |
+| failed_c2d_methods_rate_threshold_critical | C2D Methods Failed rate limit (critical threshold) | string | `10` | no |
+| failed_c2d_methods_rate_threshold_warning | C2D Methods Failed rate limit (warning threshold) | string | `0` | no |
+| failed_c2d_twin_read_rate_threshold_critical | C2D Twin Read Failed rate limit (critical threshold) | string | `10` | no |
+| failed_c2d_twin_read_rate_threshold_warning | C2D Twin Read Failed rate limit (warning threshold) | string | `0` | no |
+| failed_c2d_twin_update_rate_threshold_critical | C2D Twin Update Failed rate limit (critical threshold) | string | `10` | no |
+| failed_c2d_twin_update_rate_threshold_warning | C2D Twin Update Failed rate limit (warning threshold) | string | `0` | no |
+| failed_d2c_twin_read_rate_threshold_critical | D2C Twin Read Failed rate limit (critical threshold) | string | `10` | no |
+| failed_d2c_twin_read_rate_threshold_warning | D2C Twin Read Failed rate limit (warning threshold) | string | `0` | no |
+| failed_d2c_twin_update_rate_threshold_critical | D2C Twin Update Failed rate limit (critical threshold) | string | `10` | no |
+| failed_d2c_twin_update_rate_threshold_warning | D2C Twin Update Failed rate limit (warning threshold) | string | `0` | no |
+| failed_jobs_rate_threshold_critical | Jobs Failed rate limit (critical threshold) | string | `10` | no |
+| failed_jobs_rate_threshold_warning | Jobs Failed rate limit (warning threshold) | string | `0` | no |
+| failed_listjobs_rate_threshold_critical | ListJobs Failed rate limit (critical threshold) | string | `10` | no |
+| failed_listjobs_rate_threshold_warning | ListJobs Failed rate limit (warning threshold) | string | `0` | no |
+| failed_queryjobs_rate_threshold_critical | QueryJobs Failed rate limit (critical threshold) | string | `10` | no |
+| failed_queryjobs_rate_threshold_warning | QueryJobs Failed rate limit (warning threshold) | string | `0` | no |
+| fallback_d2c_telemetry_egress_threshold_critical | D2C Telemetry Fallback limit (critical threshold) | string | `1000` | no |
+| fallback_d2c_telemetry_egress_threshold_warning | D2C Telemetry Fallback limit (warning threshold) | string | `500` | no |
+| invalid_d2c_telemetry_egress_threshold_critical | D2C Telemetry Invalid limit (critical threshold) | string | `1000` | no |
+| invalid_d2c_telemetry_egress_threshold_warning | D2C Telemetry Invalid limit (warning threshold) | string | `500` | no |
 | message | Message sent when an alert is triggered | string | - | yes |
-| provider | What is the monitored provider | string | azure | no |
-| queryjobs_failed_threshold_critical | QueryJobs Failed rate limit (critical threshold) | string | `10` | no |
-| queryjobs_failed_threshold_warning | QueryJobs Failed rate limit (warning threshold) | string | `0` | no |
-| service | What is the monitored service | string | storage | no |
-| subscription_id | Subscription ID used to tag monitors | string | - | yes |
+| orphaned_d2c_telemetry_egress_threshold_critical | D2C Telemetry Orphaned limit (critical threshold) | string | `1000` | no |
+| orphaned_d2c_telemetry_egress_threshold_warning | D2C Telemetry Orphaned limit (warning threshold) | string | `500` | no |
+| provider | Cloud provider which the monitor and its based metric depend on | string | `azure` | no |
+| service | Service monitored by this set of monitors | string | `storage` | no |
+| subscription_id | Azure account id used as filter for monitors | string | - | yes |
+| use_filter_tags | Filter the data with service tags if true | string | `true` | no |
 
 Related documentation
 ---------------------
diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf
index 1efabc3..01c77fb 100644
--- a/cloud/azure/iothubs/inputs.tf
+++ b/cloud/azure/iothubs/inputs.tf
@@ -6,24 +6,25 @@ variable "environment" {
 
 variable "subscription_id" {
   description = "Azure account id used as filter for monitors"
-  type = "string"
+  type        = "string"
 }
 
 variable "provider" {
   description = "Cloud provider which the monitor and its based metric depend on"
-  type = "string"
-  default = "azure"
+  type        = "string"
+  default     = "azure"
 }
 
 variable "service" {
   description = "Service monitored by this set of monitors"
-  type = "string"
-  default = "storage"
+  type        = "string"
+  default     = "storage"
+}
 
 # Global DataDog
 variable "delay" {
   description = "Delay in seconds for the metric evaluation"
-  default = 600
+  default     = 600
 }
 
 variable "message" {
@@ -36,122 +37,122 @@ variable "use_filter_tags" {
 }
 
 # Azure IOT hubs specific
-variable "jobs_failed_threshold_warning" {
+variable "failed_jobs_rate_threshold_warning" {
   description = "Jobs Failed rate limit (warning threshold)"
-  default = 0
+  default     = 0
 }
 
-variable "jobs_failed_threshold_critical" {
+variable "failed_jobs_rate_threshold_critical" {
   description = "Jobs Failed rate limit (critical threshold)"
-  default = 10
+  default     = 10
 }
 
-variable "listjobs_failed_threshold_warning" {
+variable "failed_listjobs_rate_threshold_warning" {
   description = "ListJobs Failed rate limit (warning threshold)"
-  default = 0
+  default     = 0
 }
 
-variable "listjobs_failed_threshold_critical" {
+variable "failed_listjobs_rate_threshold_critical" {
   description = "ListJobs Failed rate limit (critical threshold)"
-  default = 10
+  default     = 10
 }
 
-variable "queryjobs_failed_threshold_warning" {
+variable "failed_queryjobs_rate_threshold_warning" {
   description = "QueryJobs Failed rate limit (warning threshold)"
-  default = 0
+  default     = 0
 }
 
-variable "queryjobs_failed_threshold_critical" {
+variable "failed_queryjobs_rate_threshold_critical" {
   description = "QueryJobs Failed rate limit (critical threshold)"
-  default = 10
+  default     = 10
 }
 
-variable "c2d_methods_failed_threshold_warning" {
+variable "failed_c2d_methods_rate_threshold_warning" {
   description = "C2D Methods Failed rate limit (warning threshold)"
-  default = 0
+  default     = 0
 }
 
-variable "c2d_methods_failed_threshold_critical" {
+variable "failed_c2d_methods_rate_threshold_critical" {
   description = "C2D Methods Failed rate limit (critical threshold)"
-  default = 10
+  default     = 10
 }
 
-variable "c2d_twin_read_failed_threshold_warning" {
+variable "failed_c2d_twin_read_rate_threshold_warning" {
   description = "C2D Twin Read Failed rate limit (warning threshold)"
-  default = 0
+  default     = 0
 }
 
-variable "c2d_twin_read_failed_threshold_critical" {
+variable "failed_c2d_twin_read_rate_threshold_critical" {
   description = "C2D Twin Read Failed rate limit (critical threshold)"
-  default = 10
+  default     = 10
 }
 
-variable "c2d_twin_update_failed_threshold_warning" {
+variable "failed_c2d_twin_update_rate_threshold_warning" {
   description = "C2D Twin Update Failed rate limit (warning threshold)"
-  default = 0
+  default     = 0
 }
 
-variable "c2d_twin_update_failed_threshold_critical" {
+variable "failed_c2d_twin_update_rate_threshold_critical" {
   description = "C2D Twin Update Failed rate limit (critical threshold)"
-  default = 10
+  default     = 10
 }
 
-variable "d2c_twin_read_failed_threshold_warning" {
+variable "failed_d2c_twin_read_rate_threshold_warning" {
   description = "D2C Twin Read Failed rate limit (warning threshold)"
-  default = 0
+  default     = 0
 }
 
-variable "d2c_twin_read_failed_threshold_critical" {
+variable "failed_d2c_twin_read_rate_threshold_critical" {
   description = "D2C Twin Read Failed rate limit (critical threshold)"
-  default = 10
+  default     = 10
 }
 
-variable "d2c_twin_update_failed_threshold_warning" {
+variable "failed_d2c_twin_update_rate_threshold_warning" {
   description = "D2C Twin Update Failed rate limit (warning threshold)"
-  default = 0
+  default     = 0
 }
 
-variable "d2c_twin_update_failed_threshold_critical" {
+variable "failed_d2c_twin_update_rate_threshold_critical" {
   description = "D2C Twin Update Failed rate limit (critical threshold)"
-  default = 10
+  default     = 10
 }
 
-variable "d2c_telemetry_egress_dropped_threshold_warning" {
-  description = "D2C Telemetry Dropped Failed limit (warning threshold)"
-  default = 500
+variable "dropped_d2c_telemetry_egress_threshold_warning" {
+  description = "D2C Telemetry Dropped limit (warning threshold)"
+  default     = 500
 }
 
-variable "d2c_telemetry_egress_dropped_threshold_critical" {
-  description = "D2C Telemetry Dropped Failed limit (critical threshold)"
-  default = 1000
+variable "dropped_d2c_telemetry_egress_threshold_critical" {
+  description = "D2C Telemetry Dropped limit (critical threshold)"
+  default     = 1000
 }
 
-variable "d2c_telemetry_egress_orphaned_threshold_warning" {
-  description = "D2C Telemetry Orphaned Failed limit (warning threshold)"
-  default = 500
+variable "orphaned_d2c_telemetry_egress_threshold_warning" {
+  description = "D2C Telemetry Orphaned limit (warning threshold)"
+  default     = 500
 }
 
-variable "d2c_telemetry_egress_orphaned_threshold_critical" {
-  description = "D2C Telemetry Orphaned Failed limit (critical threshold)"
-  default = 1000
+variable "orphaned_d2c_telemetry_egress_threshold_critical" {
+  description = "D2C Telemetry Orphaned limit (critical threshold)"
+  default     = 1000
 }
 
-variable "d2c_telemetry_egress_invalid_threshold_warning" {
-  description = "D2C Telemetry Invalid Failed limit (warning threshold)"
-  default = 500
+variable "invalid_d2c_telemetry_egress_threshold_warning" {
+  description = "D2C Telemetry Invalid limit (warning threshold)"
+  default     = 500
 }
 
-variable "d2c_telemetry_egress_invalid_threshold_critical" {
-  description = "D2C Telemetry Invalid Failed limit (critical threshold)"
-  default = 1000
+variable "invalid_d2c_telemetry_egress_threshold_critical" {
+  description = "D2C Telemetry Invalid limit (critical threshold)"
+  default     = 1000
 }
 
-variable "d2c_telemetry_egress_fallback_threshold_warning" {
-  description = "D2C Telemetry Fallback Failed limit (warning threshold)"
-  default = 500
+variable "fallback_d2c_telemetry_egress_threshold_warning" {
+  description = "D2C Telemetry Fallback limit (warning threshold)"
+  default     = 500
 }
 
-variable "d2c_telemetry_egress_fallback_threshold_critical" {
-  description = "D2C Telemetry Fallback Failed limit (critical threshold)"
-  default = 1000
+variable "fallback_d2c_telemetry_egress_threshold_critical" {
+  description = "D2C Telemetry Fallback limit (critical threshold)"
+  default     = 1000
 }
diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf
index 4398f5f..d7fb7e3 100644
--- a/cloud/azure/iothubs/monitors-iothubs.tf
+++ b/cloud/azure/iothubs/monitors-iothubs.tf
@@ -15,13 +15,14 @@ resource "datadog_monitor" "too_many_jobs_failed" {
             avg:azure.devices_iothubs.jobs.failed{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
             ( avg:azure.devices_iothubs.jobs.failed{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
                 avg:azure.devices_iothubs.jobs.completed{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
-          ) * 100 > ${var.jobs_failed_threshold_critical}
+          ) * 100 > ${var.failed_jobs_rate_threshold_critical}
   EOF
-  type  = "query alert"
+
+  type = "query alert"
 
   thresholds {
-    warning  = "${var.jobs_failed_threshold_warning}"
-    critical = "${var.jobs_failed_threshold_critical}"
+    warning  = "${var.failed_jobs_rate_threshold_warning}"
+    critical = "${var.failed_jobs_rate_threshold_critical}"
   }
 
   notify_no_data      = false
@@ -35,7 +36,7 @@ resource "datadog_monitor" "too_many_jobs_failed" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_list_jobs_failed" {
@@ -47,13 +48,14 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
             avg:azure.devices_iothubs.jobs.list_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() /
               ( avg:azure.devices_iothubs.jobs.list_jobs.success{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() +
                   avg:azure.devices_iothubs.jobs.list_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() )
-          ) * 100 > ${var.listjobs_failed_threshold_critical}
+          ) * 100 > ${var.failed_listjobs_rate_threshold_critical}
   EOF
-  type  = "query alert"
+
+  type = "query alert"
 
   thresholds {
-    warning  = "${var.listjobs_failed_threshold_warning}"
-    critical = "${var.listjobs_failed_threshold_critical}"
+    warning  = "${var.failed_listjobs_rate_threshold_warning}"
+    critical = "${var.failed_listjobs_rate_threshold_critical}"
   }
 
   notify_no_data      = false
@@ -67,7 +69,7 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_query_jobs_failed" {
@@ -79,13 +81,14 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
       avg:azure.devices_iothubs.jobs.query_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() /
         ( avg:azure.devices_iothubs.jobs.query_jobs.success{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() +
             avg:azure.devices_iothubs.jobs.query_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() )
-    ) * 100 > ${var.queryjobs_failed_threshold_critical}
+    ) * 100 > ${var.failed_queryjobs_rate_threshold_critical}
   EOF
-  type  = "query alert"
+
+  type = "query alert"
 
   thresholds {
-    warning  = "${var.queryjobs_failed_threshold_warning}"
-    critical = "${var.queryjobs_failed_threshold_critical}"
+    warning  = "${var.failed_queryjobs_rate_threshold_warning}"
+    critical = "${var.failed_queryjobs_rate_threshold_critical}"
   }
 
   notify_no_data      = false
@@ -99,7 +102,7 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
 }
 
 resource "datadog_monitor" "status" {
@@ -109,7 +112,8 @@ resource "datadog_monitor" "status" {
   query = <<EOF
     avg(last_5m):avg:azure.devices_iothubs.status{${data.template_file.filter.rendered}} by {name,resource_group} < 1
   EOF
-  type  = "query alert"
+
+  type = "query alert"
 
   notify_no_data      = true
   evaluation_delay    = "${var.delay}"
@@ -122,7 +126,7 @@ resource "datadog_monitor" "status" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
 }
 
 resource "datadog_monitor" "total_devices" {
@@ -132,7 +136,8 @@ resource "datadog_monitor" "total_devices" {
   query = <<EOF
     avg(last_5m):avg:azure.devices_iothubs.devices.total_devices{${data.template_file.filter.rendered}} by {name,resource_group} == 0
   EOF
-  type  = "query alert"
+
+  type = "query alert"
 
   notify_no_data      = true
   evaluation_delay    = "${var.delay}"
@@ -145,7 +150,7 @@ resource "datadog_monitor" "total_devices" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_c2d_methods_failed" {
@@ -157,13 +162,14 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
       avg:azure.devices_iothubs.c2d.methods.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
         ( avg:azure.devices_iothubs.c2d.methods.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
             avg:azure.devices_iothubs.c2d.methods.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
-    ) * 100 > ${var.c2d_methods_failed_threshold_critical}
+    ) * 100 > ${var.failed_c2d_methods_rate_threshold_critical}
   EOF
-  type  = "query alert"
+
+  type = "query alert"
 
   thresholds {
-    warning  = "${var.c2d_methods_failed_threshold_warning}"
-    critical = "${var.c2d_methods_failed_threshold_critical}"
+    warning  = "${var.failed_c2d_methods_rate_threshold_warning}"
+    critical = "${var.failed_c2d_methods_rate_threshold_critical}"
   }
 
   notify_no_data      = false
@@ -177,7 +183,7 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
@@ -189,13 +195,14 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
       avg:azure.devices_iothubs.c2d.twin.read.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
         ( avg:azure.devices_iothubs.c2d.twin.read.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
             avg:azure.devices_iothubs.c2d.twin.read.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
-    ) * 100 > ${var.c2d_twin_read_failed_threshold_critical}
+    ) * 100 > ${var.failed_c2d_twin_read_rate_threshold_critical}
   EOF
-  type  = "query alert"
+
+  type = "query alert"
 
   thresholds {
-    warning  = "${var.c2d_twin_read_failed_threshold_warning}"
-    critical = "${var.c2d_twin_read_failed_threshold_critical}"
+    warning  = "${var.failed_c2d_twin_read_rate_threshold_warning}"
+    critical = "${var.failed_c2d_twin_read_rate_threshold_critical}"
   }
 
   notify_no_data      = false
@@ -209,7 +216,7 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
@@ -221,13 +228,14 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
       avg:azure.devices_iothubs.c2d.twin.update.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
       ( avg:azure.devices_iothubs.c2d.twin.update.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
           avg:azure.devices_iothubs.c2d.twin.update.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
-    ) * 100 > ${var.c2d_twin_update_failed_threshold_critical}
+    ) * 100 > ${var.failed_c2d_twin_update_rate_threshold_critical}
   EOF
-  type  = "query alert"
+
+  type = "query alert"
 
   thresholds {
-    warning  = "${var.c2d_twin_update_failed_threshold_warning}"
-    critical = "${var.c2d_twin_update_failed_threshold_critical}"
+    warning  = "${var.failed_c2d_twin_update_rate_threshold_warning}"
+    critical = "${var.failed_c2d_twin_update_rate_threshold_critical}"
   }
 
   notify_no_data      = false
@@ -241,7 +249,7 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
@@ -253,13 +261,14 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
       avg:azure.devices_iothubs.d2c.twin.read.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
         ( avg:azure.devices_iothubs.d2c.twin.read.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
           avg:azure.devices_iothubs.d2c.twin.read.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
-    ) * 100 > ${var.d2c_twin_read_failed_threshold_critical}
+    ) * 100 > ${var.failed_d2c_twin_read_rate_threshold_critical}
   EOF
-  type  = "query alert"
+
+  type = "query alert"
 
   thresholds {
-    warning  = "${var.d2c_twin_read_failed_threshold_warning}"
-    critical = "${var.d2c_twin_read_failed_threshold_critical}"
+    warning  = "${var.failed_d2c_twin_read_rate_threshold_warning}"
+    critical = "${var.failed_d2c_twin_read_rate_threshold_critical}"
   }
 
   notify_no_data      = false
@@ -273,7 +282,7 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
@@ -285,13 +294,14 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
       avg:azure.devices_iothubs.d2c.twin.update.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
         ( avg:azure.devices_iothubs.d2c.twin.update.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
           avg:azure.devices_iothubs.d2c.twin.update.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
-    ) * 100 > ${var.d2c_twin_update_failed_threshold_critical}
+    ) * 100 > ${var.failed_d2c_twin_update_rate_threshold_critical}
   EOF
-  type  = "query alert"
+
+  type = "query alert"
 
   thresholds {
-    warning  = "${var.d2c_twin_update_failed_threshold_warning}"
-    critical = "${var.d2c_twin_update_failed_threshold_critical}"
+    warning  = "${var.failed_d2c_twin_update_rate_threshold_warning}"
+    critical = "${var.failed_d2c_twin_update_rate_threshold_critical}"
   }
 
   notify_no_data      = false
@@ -305,7 +315,7 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
@@ -315,13 +325,14 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
   query = <<EOF
       sum(last_5m): (
         avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
-      ) > ${var.d2c_telemetry_egress_dropped_threshold_critical}
+      ) > ${var.dropped_d2c_telemetry_egress_threshold_critical}
   EOF
-  type  = "query alert"
+
+  type = "query alert"
 
   thresholds {
-    warning  = "${var.d2c_telemetry_egress_dropped_threshold_warning}"
-    critical = "${var.d2c_telemetry_egress_dropped_threshold_critical}"
+    warning  = "${var.dropped_d2c_telemetry_egress_threshold_warning}"
+    critical = "${var.dropped_d2c_telemetry_egress_threshold_critical}"
   }
 
   notify_no_data      = false
@@ -335,7 +346,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
@@ -345,13 +356,14 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
   query = <<EOF
     sum(last_5m): (
       avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
-    ) > ${var.d2c_telemetry_egress_orphaned_threshold_critical}
+    ) > ${var.orphaned_d2c_telemetry_egress_threshold_critical}
   EOF
-  type  = "query alert"
+
+  type = "query alert"
 
   thresholds {
-    warning  = "${var.d2c_telemetry_egress_orphaned_threshold_warning}"
-    critical = "${var.d2c_telemetry_egress_orphaned_threshold_critical}"
+    warning  = "${var.orphaned_d2c_telemetry_egress_threshold_warning}"
+    critical = "${var.orphaned_d2c_telemetry_egress_threshold_critical}"
   }
 
   notify_no_data      = false
@@ -365,7 +377,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
@@ -375,13 +387,14 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
   query = <<EOF
     sum(last_5m): (
       avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
-    ) > ${var.d2c_telemetry_egress_invalid_threshold_critical}
+    ) > ${var.invalid_d2c_telemetry_egress_threshold_critical}
   EOF
-  type  = "query alert"
+
+  type = "query alert"
 
   thresholds {
-    warning  = "${var.d2c_telemetry_egress_invalid_threshold_warning}"
-    critical = "${var.d2c_telemetry_egress_invalid_threshold_critical}"
+    warning  = "${var.invalid_d2c_telemetry_egress_threshold_warning}"
+    critical = "${var.invalid_d2c_telemetry_egress_threshold_critical}"
   }
 
   notify_no_data      = false
@@ -395,7 +408,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
@@ -405,13 +418,14 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
   query = <<EOF
     sum(last_5m): (
       avg:azure.devices_iothubs.d2c.telemetry.egress.fallback{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
-    )  > ${var.d2c_telemetry_egress_fallback_threshold_critical}
+    )  > ${var.fallback_d2c_telemetry_egress_threshold_critical}
   EOF
-  type  = "query alert"
+
+  type = "query alert"
 
   thresholds {
-    warning  = "${var.d2c_telemetry_egress_fallback_threshold_warning}"
-    critical = "${var.d2c_telemetry_egress_fallback_threshold_critical}"
+    warning  = "${var.fallback_d2c_telemetry_egress_threshold_warning}"
+    critical = "${var.fallback_d2c_telemetry_egress_threshold_critical}"
   }
 
   notify_no_data      = false
@@ -425,7 +439,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
 }
 
 resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
@@ -438,7 +452,8 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
         avg:azure.devices_iothubs.d2c.telemetry.ingress.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
     ) > 0
   EOF
-  type  = "query alert"
+
+  type = "query alert"
 
   notify_no_data      = false
   evaluation_delay    = "${var.delay}"
@@ -451,5 +466,5 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
 }

From 279778ed888f891f8a30d033b004275757c904ff Mon Sep 17 00:00:00 2001
From: Laurent Piroelle <laurent.piroelle@fr.clara.net>
Date: Thu, 23 Nov 2017 15:12:54 +0100
Subject: [PATCH 14/14] MON-80 Normalize monitors

---
 cloud/azure/iothubs/README.md           | 11 ++---
 cloud/azure/iothubs/inputs.tf           | 26 +++--------
 cloud/azure/iothubs/monitors-iothubs.tf | 62 ++++++++++++-------------
 3 files changed, 42 insertions(+), 57 deletions(-)

diff --git a/cloud/azure/iothubs/README.md b/cloud/azure/iothubs/README.md
index 339b357..5187715 100644
--- a/cloud/azure/iothubs/README.md
+++ b/cloud/azure/iothubs/README.md
@@ -8,9 +8,8 @@ How to use this module
 module "iothubs" {
   source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/iothubs?ref=MON-80-azure-hub-iot-monitors"
   
-  message         = "${module.datadog-message-alerting.alerting-message}"
-  environment     = "${var.environment}"
-  subscription_id = "${var.subscription_id}"
+  message     = "${module.datadog-message-alerting.alerting-message}"
+  environment = "${var.environment}"
 }
 ```
 
@@ -61,15 +60,13 @@ Inputs
 | failed_queryjobs_rate_threshold_warning | QueryJobs Failed rate limit (warning threshold) | string | `0` | no |
 | fallback_d2c_telemetry_egress_threshold_critical | D2C Telemetry Fallback limit (critical threshold) | string | `1000` | no |
 | fallback_d2c_telemetry_egress_threshold_warning | D2C Telemetry Fallback limit (warning threshold) | string | `500` | no |
+| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
+| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
 | invalid_d2c_telemetry_egress_threshold_critical | D2C Telemetry Invalid limit (critical threshold) | string | `1000` | no |
 | invalid_d2c_telemetry_egress_threshold_warning | D2C Telemetry Invalid limit (warning threshold) | string | `500` | no |
 | message | Message sent when an alert is triggered | string | - | yes |
 | orphaned_d2c_telemetry_egress_threshold_critical | D2C Telemetry Orphaned limit (critical threshold) | string | `1000` | no |
 | orphaned_d2c_telemetry_egress_threshold_warning | D2C Telemetry Orphaned limit (warning threshold) | string | `500` | no |
-| provider | Cloud provider which the monitor and its based metric depend on | string | `azure` | no |
-| service | Service monitored by this set of monitors | string | `storage` | no |
-| subscription_id | Azure account id used as filter for monitors | string | - | yes |
-| use_filter_tags | Filter the data with service tags if true | string | `true` | no |
 
 Related documentation
 ---------------------
diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf
index 01c77fb..1b1348f 100644
--- a/cloud/azure/iothubs/inputs.tf
+++ b/cloud/azure/iothubs/inputs.tf
@@ -4,23 +4,6 @@ variable "environment" {
   type        = "string"
 }
 
-variable "subscription_id" {
-  description = "Azure account id used as filter for monitors"
-  type        = "string"
-}
-
-variable "provider" {
-  description = "Cloud provider which the monitor and its based metric depend on"
-  type        = "string"
-  default     = "azure"
-}
-
-variable "service" {
-  description = "Service monitored by this set of monitors"
-  type        = "string"
-  default     = "storage"
-}
-
 # Global DataDog
 variable "delay" {
   description = "Delay in seconds for the metric evaluation"
@@ -31,11 +14,16 @@ variable "message" {
   description = "Message sent when an alert is triggered"
 }
 
-variable "use_filter_tags" {
-  description = "Filter the data with service tags if true"
+variable "filter_tags_use_defaults" {
+  description = "Use default filter tags convention"
   default     = "true"
 }
 
+variable "filter_tags_custom" {
+  description = "Tags used for custom filtering when filter_tags_use_defaults is false"
+  default     = "*"
+}
+
 # Azure IOT hubs specific
 variable "failed_jobs_rate_threshold_warning" {
   description = "Jobs Failed rate limit (warning threshold)"
diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf
index d7fb7e3..6e1f926 100644
--- a/cloud/azure/iothubs/monitors-iothubs.tf
+++ b/cloud/azure/iothubs/monitors-iothubs.tf
@@ -2,7 +2,7 @@ data "template_file" "filter" {
   template = "$${filter}"
 
   vars {
-    filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "${data.template_file.filter.rendered}"}"
+    filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_iothub:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
   }
 }
 
@@ -18,7 +18,7 @@ resource "datadog_monitor" "too_many_jobs_failed" {
           ) * 100 > ${var.failed_jobs_rate_threshold_critical}
   EOF
 
-  type = "query alert"
+  type = "metric alert"
 
   thresholds {
     warning  = "${var.failed_jobs_rate_threshold_warning}"
@@ -36,7 +36,7 @@ resource "datadog_monitor" "too_many_jobs_failed" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 
 resource "datadog_monitor" "too_many_list_jobs_failed" {
@@ -51,7 +51,7 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
           ) * 100 > ${var.failed_listjobs_rate_threshold_critical}
   EOF
 
-  type = "query alert"
+  type = "metric alert"
 
   thresholds {
     warning  = "${var.failed_listjobs_rate_threshold_warning}"
@@ -69,7 +69,7 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 
 resource "datadog_monitor" "too_many_query_jobs_failed" {
@@ -84,7 +84,7 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
     ) * 100 > ${var.failed_queryjobs_rate_threshold_critical}
   EOF
 
-  type = "query alert"
+  type = "metric alert"
 
   thresholds {
     warning  = "${var.failed_queryjobs_rate_threshold_warning}"
@@ -102,7 +102,7 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 
 resource "datadog_monitor" "status" {
@@ -113,7 +113,7 @@ resource "datadog_monitor" "status" {
     avg(last_5m):avg:azure.devices_iothubs.status{${data.template_file.filter.rendered}} by {name,resource_group} < 1
   EOF
 
-  type = "query alert"
+  type = "metric alert"
 
   notify_no_data      = true
   evaluation_delay    = "${var.delay}"
@@ -126,7 +126,7 @@ resource "datadog_monitor" "status" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 
 resource "datadog_monitor" "total_devices" {
@@ -137,7 +137,7 @@ resource "datadog_monitor" "total_devices" {
     avg(last_5m):avg:azure.devices_iothubs.devices.total_devices{${data.template_file.filter.rendered}} by {name,resource_group} == 0
   EOF
 
-  type = "query alert"
+  type = "metric alert"
 
   notify_no_data      = true
   evaluation_delay    = "${var.delay}"
@@ -150,7 +150,7 @@ resource "datadog_monitor" "total_devices" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 
 resource "datadog_monitor" "too_many_c2d_methods_failed" {
@@ -165,7 +165,7 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
     ) * 100 > ${var.failed_c2d_methods_rate_threshold_critical}
   EOF
 
-  type = "query alert"
+  type = "metric alert"
 
   thresholds {
     warning  = "${var.failed_c2d_methods_rate_threshold_warning}"
@@ -183,7 +183,7 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 
 resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
@@ -198,7 +198,7 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
     ) * 100 > ${var.failed_c2d_twin_read_rate_threshold_critical}
   EOF
 
-  type = "query alert"
+  type = "metric alert"
 
   thresholds {
     warning  = "${var.failed_c2d_twin_read_rate_threshold_warning}"
@@ -216,7 +216,7 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 
 resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
@@ -231,7 +231,7 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
     ) * 100 > ${var.failed_c2d_twin_update_rate_threshold_critical}
   EOF
 
-  type = "query alert"
+  type = "metric alert"
 
   thresholds {
     warning  = "${var.failed_c2d_twin_update_rate_threshold_warning}"
@@ -249,7 +249,7 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 
 resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
@@ -264,7 +264,7 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
     ) * 100 > ${var.failed_d2c_twin_read_rate_threshold_critical}
   EOF
 
-  type = "query alert"
+  type = "metric alert"
 
   thresholds {
     warning  = "${var.failed_d2c_twin_read_rate_threshold_warning}"
@@ -282,7 +282,7 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 
 resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
@@ -297,7 +297,7 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
     ) * 100 > ${var.failed_d2c_twin_update_rate_threshold_critical}
   EOF
 
-  type = "query alert"
+  type = "metric alert"
 
   thresholds {
     warning  = "${var.failed_d2c_twin_update_rate_threshold_warning}"
@@ -315,7 +315,7 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
@@ -328,7 +328,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
       ) > ${var.dropped_d2c_telemetry_egress_threshold_critical}
   EOF
 
-  type = "query alert"
+  type = "metric alert"
 
   thresholds {
     warning  = "${var.dropped_d2c_telemetry_egress_threshold_warning}"
@@ -346,7 +346,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
@@ -359,7 +359,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
     ) > ${var.orphaned_d2c_telemetry_egress_threshold_critical}
   EOF
 
-  type = "query alert"
+  type = "metric alert"
 
   thresholds {
     warning  = "${var.orphaned_d2c_telemetry_egress_threshold_warning}"
@@ -377,7 +377,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
@@ -390,7 +390,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
     ) > ${var.invalid_d2c_telemetry_egress_threshold_critical}
   EOF
 
-  type = "query alert"
+  type = "metric alert"
 
   thresholds {
     warning  = "${var.invalid_d2c_telemetry_egress_threshold_warning}"
@@ -408,7 +408,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 
 resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
@@ -421,7 +421,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
     )  > ${var.fallback_d2c_telemetry_egress_threshold_critical}
   EOF
 
-  type = "query alert"
+  type = "metric alert"
 
   thresholds {
     warning  = "${var.fallback_d2c_telemetry_egress_threshold_warning}"
@@ -439,7 +439,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }
 
 resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
@@ -453,7 +453,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
     ) > 0
   EOF
 
-  type = "query alert"
+  type = "metric alert"
 
   notify_no_data      = false
   evaluation_delay    = "${var.delay}"
@@ -466,5 +466,5 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
   new_host_delay      = "${var.delay}"
   no_data_timeframe   = 20
 
-  tags = ["env:${var.environment}", "resource:${var.service}", "team:${var.provider}"]
+  tags = ["env:${var.environment}", "resource:iothub", "team:azure", "provider:azure"]
 }