From 7f0a0e91cf6fdd3cb6ea5d33abcbf2dbdb41c0a2 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Mon, 30 Oct 2017 15:21:40 +0100 Subject: [PATCH] MON-80 Rename variable for message alerting --- cloud/azure/iothubs/inputs.tf | 8 +++++--- cloud/azure/iothubs/monitors-iothubs.tf | 8 ++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf index ddc3456..5de7dab 100644 --- a/cloud/azure/iothubs/inputs.tf +++ b/cloud/azure/iothubs/inputs.tf @@ -1,10 +1,12 @@ -variable "hno_escalation_group" {} +variable "critical_escalation_group" {} -variable "ho_escalation_group" {} +variable "warning_escalation_group" {} variable "environment" {} -variable "subscription_id" {} +variable "stack" {} + +variable "client_name" {} ## IOT hubs variable "delay" { diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf index 5f584db..e333808 100644 --- a/cloud/azure/iothubs/monitors-iothubs.tf +++ b/cloud/azure/iothubs/monitors-iothubs.tf @@ -1,6 +1,6 @@ resource "datadog_monitor" "too_many_jobs_failed" { name = "[${var.environment}] Too many jobs failed on {{name}} " - message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}" + message = "{{#is_alert}}\n${var.critical_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.critical_escalation_group}\n{{/is_recovery}}" query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.failed{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.jobs.failed{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.jobs.completed{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.critical_jobs_failed}" type = "query alert" @@ -24,7 +24,7 @@ resource "datadog_monitor" "too_many_jobs_failed" { resource "datadog_monitor" "too_many_list_jobs_failed" { name = "[${var.environment}] Too many list_jobs failure on {{name}} " - message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}" + message = "{{#is_alert}}\n${var.critical_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.critical_escalation_group}\n{{/is_recovery}}" query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.list_jobs.failure{*} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.list_jobs.success{*} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.list_jobs.failure{*} by {resource_group,name}.as_count() ) ) * 100 > ${var.critical_listjobs_failed}" type = "query alert" @@ -48,7 +48,7 @@ resource "datadog_monitor" "too_many_list_jobs_failed" { resource "datadog_monitor" "too_many_query_jobs_failed" { name = "[${var.environment}] Too many query_jobs failed on {{name}} " - message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}" + message = "{{#is_alert}}\n${var.critical_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.critical_escalation_group}\n{{/is_recovery}}" query = "sum(last_5m):( avg:azure.devices_iothubs.jobs.query_jobs.failure{*} by {resource_group,name}.as_count() / ( avg:azure.devices_iothubs.jobs.query_jobs.success{*} by {resource_group,name}.as_count() + avg:azure.devices_iothubs.jobs.query_jobs.failure{*} by {resource_group,name}.as_count() ) ) * 100 > ${var.critical_queryjobs_failed}" type = "query alert" @@ -72,7 +72,7 @@ resource "datadog_monitor" "too_many_query_jobs_failed" { resource "datadog_monitor" "status" { name = "[${var.environment}] Status is not ok on {{name}} " - message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}" + message = "{{#is_alert}}\n${var.critical_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.critical_escalation_group}\n{{/is_recovery}}" query = "avg(last_5m):avg:azure.devices_iothubs.status{*} by {name,resource_group} < 1" type = "query alert"