MON-78 Add datadog monitor for stream analytics

This commit is contained in:
Jérémy NANCEL 2017-10-30 12:47:19 +01:00
parent ac6e0d69e1
commit e4e929ec1d
2 changed files with 136 additions and 0 deletions

View File

@ -0,0 +1,44 @@
variable "hno_escalation_group" {}
variable "ho_escalation_group" {}
variable "environment" {}
variable "notify_no_data" {
default = "false"
}
variable "delay" {
default = "600"
}
variable "su_utilization_warning" {
default = 60
}
variable "su_utilization_critical" {
default = 80
}
variable "failed_function_requests_warning" {
default = 0
}
variable "failed_function_requests_critical" {
default = 10
}
variable "conversion_errors_warning" {
default = 0
}
variable "conversion_errors_critical" {
default = 10
}
variable "runtime_errors_warning" {
default = 0
}
variable "runtime_errors_critical" {
default = 0
}

View File

@ -0,0 +1,92 @@
resource "datadog_monitor" "SU_utilization" {
name = "[${var.environment} SU utilization at more than ${var.su_utilization_critical}% on {{host.identifier}}]"
message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}"
query = "avg(last_5m):avg:azure.streamanalytics_streamingjobs.resource_utilization{*} by {name,resource_group} > ${var.su_utilization_critical}"
type = "query alert"
notify_no_data = "${var.notify_no_data}"
evaluation_delay = "${var.delay}"
renotify_interval = 60
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = true
new_host_delay = "${var.delay}"
no_data_timeframe = 20
thresholds {
warning = "${var.su_utilization_warning}"
critical = "${var.su_utilization_critical}"
}
}
resource "datadog_monitor" "failed_function_requests" {
name = "[${var.environment} More than ${var.failed_function_requests_critical} failed function requests on {{host.identifier}}]"
message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}"
query = "avg(last_5m):avg:azure.streamanalytics_streamingjobs.aml_callout_failed_requests{*} by {name,resource_group} > ${var.failed_function_requests_critical}"
type = "query alert"
notify_no_data = "${var.notify_no_data}"
evaluation_delay = "${var.delay}"
renotify_interval = 60
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = true
new_host_delay = "${var.delay}"
no_data_timeframe = 20
thresholds {
warning = "${var.failed_function_requests_warning}"
critical = "${var.failed_function_requests_critical}"
}
}
resource "datadog_monitor" "conversion_errors" {
name = "[${var.environment} More than ${var.conversion_errors_critical} conversion errors on {{host.identifier}}]"
message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}"
query = "avg(last_5m):avg:azure.streamanalytics_streamingjobs.conversion_errors{*} by {name,resource_group} > ${var.conversion_errors_critical}"
type = "query alert"
notify_no_data = "${var.notify_no_data}"
evaluation_delay = "${var.delay}"
renotify_interval = 60
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = true
new_host_delay = "${var.delay}"
no_data_timeframe = 20
thresholds {
warning = "${var.conversion_errors_warning}"
critical = "${var.conversion_errors_critical}"
}
}
resource "datadog_monitor" "runtime_errors" {
name = "[${var.environment} More than ${var.runtime_errors_critical} runtime errors on {{host.identifier}}]"
message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}"
query = "avg(last_5m):avg:azure.streamanalytics_streamingjobs.errors{*} by {name,resource_group} > ${var.runtime_errors_critical}"
type = "query alert"
notify_no_data = "${var.notify_no_data}"
evaluation_delay = "${var.delay}"
renotify_interval = 60
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = true
new_host_delay = "${var.delay}"
no_data_timeframe = 20
thresholds {
warning = "${var.runtime_errors_warning}"
critical = "${var.runtime_errors_critical}"
}
}