Merge branch 'MON-335-Fix-Azure-App-Services-HTTP-successful-responses-too-low-monitor' into 'master'

MON-335 - default 100 added and dividend fix to 0

Closes MON-335

See merge request claranet/cloudnative/projects/datadog/terraform/monitors!7
This commit is contained in:
Quentin Manfroi 2018-11-29 14:55:00 +01:00
commit d061bbb7d3
4 changed files with 66 additions and 49 deletions

View File

@ -133,10 +133,12 @@ resource "datadog_monitor" "apimgt_successful_requests" {
message = "${coalesce(var.successful_requests_message, var.message)}"
query = <<EOF
${var.successful_requests_time_aggregator}(${var.successful_requests_timeframe}): (
default(avg:azure.apimanagement_service.successful_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1) /
default(avg:azure.apimanagement_service.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
) * 100 < ${var.successful_requests_threshold_critical}
${var.successful_requests_time_aggregator}(${var.successful_requests_timeframe}):
default(
avg:azure.apimanagement_service.successful_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate() /
avg:azure.apimanagement_service.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate()
* 100
, 100) < ${var.successful_requests_threshold_critical}
EOF
thresholds {

View File

@ -136,11 +136,12 @@ resource "datadog_monitor" "appservices_http_success_status_rate" {
message = "${coalesce(var.http_successful_requests_message, var.message)}"
query = <<EOF
${var.http_successful_requests_time_aggregator}(${var.http_successful_requests_timeframe}): ( (
default(avg:azure.app_services.http2xx${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.app_services.http3xx${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) ) /
default(avg:azure.app_services.requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
) * 100 < ${var.http_successful_requests_threshold_critical}
${var.http_successful_requests_time_aggregator}(${var.http_successful_requests_timeframe}):
default( (
(default(avg:azure.app_services.http2xx${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.app_services.http3xx${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) ) /
default(avg:azure.app_services.requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0)
) * 100, 100) < ${var.http_successful_requests_threshold_critical}
EOF
evaluation_delay = "${var.evaluation_delay}"

View File

@ -4,11 +4,12 @@ resource "datadog_monitor" "too_many_jobs_failed" {
message = "${coalesce(var.failed_jobs_rate_message, var.message)}"
query = <<EOF
${var.failed_jobs_rate_time_aggregator}(${var.failed_jobs_rate_timeframe}):(
${var.failed_jobs_rate_time_aggregator}(${var.failed_jobs_rate_timeframe}):
default(
default(avg:azure.devices_iothubs.jobs.failed{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.jobs.failed{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.jobs.completed{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
) * 100 > ${var.failed_jobs_rate_threshold_critical}
* 100 , 0) > ${var.failed_jobs_rate_threshold_critical}
EOF
type = "metric alert"
@ -20,7 +21,7 @@ resource "datadog_monitor" "too_many_jobs_failed" {
silenced = "${var.failed_jobs_rate_silenced}"
notify_no_data = false
notify_no_data = true
evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0
notify_audit = false
@ -39,11 +40,12 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
message = "${coalesce(var.failed_listjobs_rate_message, var.message)}"
query = <<EOF
${var.failed_listjobs_rate_time_aggregator}(${var.failed_listjobs_rate_timeframe}):(
${var.failed_listjobs_rate_time_aggregator}(${var.failed_listjobs_rate_timeframe}):
default(
default(avg:azure.devices_iothubs.jobs.list_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.jobs.list_jobs.success{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.jobs.list_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) )
) * 100 > ${var.failed_listjobs_rate_threshold_critical}
* 100, 0) > ${var.failed_listjobs_rate_threshold_critical}
EOF
type = "metric alert"
@ -55,7 +57,7 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
silenced = "${var.failed_listjobs_rate_silenced}"
notify_no_data = false
notify_no_data = true
evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0
notify_audit = false
@ -74,11 +76,12 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
message = "${coalesce(var.failed_queryjobs_rate_message, var.message)}"
query = <<EOF
${var.failed_queryjobs_rate_time_aggregator}(${var.failed_queryjobs_rate_timeframe}):(
${var.failed_queryjobs_rate_time_aggregator}(${var.failed_queryjobs_rate_timeframe}):
default(
default(avg:azure.devices_iothubs.jobs.query_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.jobs.query_jobs.success{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.jobs.query_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) )
) * 100 > ${var.failed_queryjobs_rate_threshold_critical}
* 100, 0) > ${var.failed_queryjobs_rate_threshold_critical}
EOF
type = "metric alert"
@ -90,7 +93,7 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
silenced = "${var.failed_queryjobs_rate_silenced}"
notify_no_data = false
notify_no_data = true
evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0
notify_audit = false
@ -165,11 +168,12 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
message = "${coalesce(var.failed_c2d_methods_rate_message, var.message)}"
query = <<EOF
${var.failed_c2d_methods_rate_time_aggregator}(${var.failed_c2d_methods_rate_timeframe}):(
${var.failed_c2d_methods_rate_time_aggregator}(${var.failed_c2d_methods_rate_timeframe}):
default(
default(avg:azure.devices_iothubs.c2d.methods.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.c2d.methods.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.c2d.methods.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
) * 100 > ${var.failed_c2d_methods_rate_threshold_critical}
* 100, 0) > ${var.failed_c2d_methods_rate_threshold_critical}
EOF
type = "metric alert"
@ -181,7 +185,7 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
silenced = "${var.failed_c2d_methods_rate_silenced}"
notify_no_data = false
notify_no_data = true
evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0
notify_audit = false
@ -200,11 +204,12 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
message = "${coalesce(var.failed_c2d_twin_read_rate_message, var.message)}"
query = <<EOF
${var.failed_c2d_twin_read_rate_time_aggregator}(${var.failed_c2d_twin_read_rate_timeframe}):(
${var.failed_c2d_twin_read_rate_time_aggregator}(${var.failed_c2d_twin_read_rate_timeframe}):
default(
default(avg:azure.devices_iothubs.c2d.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.c2d.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.c2d.twin.read.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
) * 100 > ${var.failed_c2d_twin_read_rate_threshold_critical}
* 100, 0) > ${var.failed_c2d_twin_read_rate_threshold_critical}
EOF
type = "metric alert"
@ -216,7 +221,7 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
silenced = "${var.failed_c2d_twin_read_rate_silenced}"
notify_no_data = false
notify_no_data = true
evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0
notify_audit = false
@ -235,11 +240,12 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
message = "${coalesce(var.failed_c2d_twin_update_rate_message, var.message)}"
query = <<EOF
${var.failed_c2d_twin_update_rate_time_aggregator}(${var.failed_c2d_twin_update_rate_timeframe}):(
${var.failed_c2d_twin_update_rate_time_aggregator}(${var.failed_c2d_twin_update_rate_timeframe}):
default(
default(avg:azure.devices_iothubs.c2d.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.c2d.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.c2d.twin.update.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
) * 100 > ${var.failed_c2d_twin_update_rate_threshold_critical}
* 100, 0) > ${var.failed_c2d_twin_update_rate_threshold_critical}
EOF
type = "metric alert"
@ -251,7 +257,7 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
silenced = "${var.failed_c2d_twin_update_rate_silenced}"
notify_no_data = false
notify_no_data = true
evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0
notify_audit = false
@ -270,11 +276,12 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
message = "${coalesce(var.failed_d2c_twin_read_rate_message, var.message)}"
query = <<EOF
${var.failed_d2c_twin_read_rate_time_aggregator}(${var.failed_d2c_twin_read_rate_timeframe}):(
${var.failed_d2c_twin_read_rate_time_aggregator}(${var.failed_d2c_twin_read_rate_timeframe}):
default(
default(avg:azure.devices_iothubs.d2c.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.d2c.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.twin.read.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
) * 100 > ${var.failed_d2c_twin_read_rate_threshold_critical}
* 100, 0) > ${var.failed_d2c_twin_read_rate_threshold_critical}
EOF
type = "metric alert"
@ -286,7 +293,7 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
silenced = "${var.failed_d2c_twin_read_rate_silenced}"
notify_no_data = false
notify_no_data = true
evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0
notify_audit = false
@ -305,11 +312,12 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
message = "${coalesce(var.failed_d2c_twin_update_rate_message, var.message)}"
query = <<EOF
${var.failed_d2c_twin_update_rate_time_aggregator}(${var.failed_d2c_twin_update_rate_timeframe}):(
${var.failed_d2c_twin_update_rate_time_aggregator}(${var.failed_d2c_twin_update_rate_timeframe}):
default(
default(avg:azure.devices_iothubs.d2c.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.d2c.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.twin.update.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
) * 100 > ${var.failed_d2c_twin_update_rate_threshold_critical}
* 100, 0) > ${var.failed_d2c_twin_update_rate_threshold_critical}
EOF
type = "metric alert"
@ -321,7 +329,7 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
silenced = "${var.failed_d2c_twin_update_rate_silenced}"
notify_no_data = false
notify_no_data = true
evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0
notify_audit = false
@ -340,13 +348,14 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
message = "${coalesce(var.dropped_d2c_telemetry_egress_message, var.message)}"
query = <<EOF
${var.dropped_d2c_telemetry_egress_time_aggregator}(${var.dropped_d2c_telemetry_egress_timeframe}): (
${var.dropped_d2c_telemetry_egress_time_aggregator}(${var.dropped_d2c_telemetry_egress_timeframe}):
default(
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
) * 100 > ${var.dropped_d2c_telemetry_egress_rate_threshold_critical}
* 100, 0) > ${var.dropped_d2c_telemetry_egress_rate_threshold_critical}
EOF
type = "metric alert"
@ -358,7 +367,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
silenced = "${var.dropped_d2c_telemetry_egress_silenced}"
notify_no_data = false
notify_no_data = true
evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0
notify_audit = false
@ -377,13 +386,14 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
message = "${coalesce(var.orphaned_d2c_telemetry_egress_message, var.message)}"
query = <<EOF
${var.orphaned_d2c_telemetry_egress_time_aggregator}(${var.orphaned_d2c_telemetry_egress_timeframe}): (
${var.orphaned_d2c_telemetry_egress_time_aggregator}(${var.orphaned_d2c_telemetry_egress_timeframe}):
default(
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
) * 100 > ${var.orphaned_d2c_telemetry_egress_rate_threshold_critical}
* 100, 0) > ${var.orphaned_d2c_telemetry_egress_rate_threshold_critical}
EOF
type = "metric alert"
@ -395,7 +405,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
silenced = "${var.orphaned_d2c_telemetry_egress_silenced}"
notify_no_data = false
notify_no_data = true
evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0
notify_audit = false
@ -414,13 +424,14 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
message = "${coalesce(var.invalid_d2c_telemetry_egress_message, var.message)}"
query = <<EOF
${var.invalid_d2c_telemetry_egress_time_aggregator}(${var.invalid_d2c_telemetry_egress_timeframe}): (
${var.invalid_d2c_telemetry_egress_time_aggregator}(${var.invalid_d2c_telemetry_egress_timeframe}):
default(
default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
) * 100 > ${var.invalid_d2c_telemetry_egress_rate_threshold_critical}
* 100, 0) > ${var.invalid_d2c_telemetry_egress_rate_threshold_critical}
EOF
type = "metric alert"
@ -432,7 +443,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
silenced = "${var.invalid_d2c_telemetry_egress_silenced}"
notify_no_data = false
notify_no_data = true
evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0
notify_audit = false
@ -451,17 +462,18 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
message = "${coalesce(var.too_many_d2c_telemetry_ingress_nosent_message, var.message)}"
query = <<EOF
sum(${var.too_many_d2c_telemetry_ingress_nosent_timeframe}): (
sum(${var.too_many_d2c_telemetry_ingress_nosent_timeframe}):
default(
avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol{${var.filter_tags}} by {resource_group,region,name}.as_count() -
avg:azure.devices_iothubs.d2c.telemetry.ingress.success{${var.filter_tags}} by {resource_group,region,name}.as_count()
) > 0
, 0) > 0
EOF
type = "metric alert"
silenced = "${var.too_many_d2c_telemetry_ingress_nosent_silenced}"
notify_no_data = false
notify_no_data = true
evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0
notify_audit = false

View File

@ -34,10 +34,12 @@ resource "datadog_monitor" "keyvault_api_result" {
message = "${coalesce(var.status_message, var.message)}"
query = <<EOF
${var.api_result_time_aggregator}(${var.api_result_timeframe}): (
default(avg:azure.keyvault_vaults.service_api_result${format(module.filter-tags-statuscode.query_alert, "200")} by {name,resource_group,region}.as_rate(), 1) /
default(avg:azure.keyvault_vaults.service_api_result${module.filter-tags.query_alert} by {name,resource_group,region}.as_rate(), 1)
) * 100 < ${var.api_result_threshold_critical}
${var.api_result_time_aggregator}(${var.api_result_timeframe}):
default(
avg:azure.keyvault_vaults.service_api_result${format(module.filter-tags-statuscode.query_alert, "200")} by {name,resource_group,region}.as_rate() /
avg:azure.keyvault_vaults.service_api_result${module.filter-tags.query_alert} by {name,resource_group,region}.as_rate()
* 100
, 100) < ${var.api_result_threshold_critical}
EOF
thresholds {