MON-78 Add datadog monitor for stream analytics
This commit is contained in:
parent
ac6e0d69e1
commit
e4e929ec1d
44
cloud/azure/stream-analytics/inputs.tf
Normal file
44
cloud/azure/stream-analytics/inputs.tf
Normal file
@ -0,0 +1,44 @@
|
||||
variable "hno_escalation_group" {}
|
||||
variable "ho_escalation_group" {}
|
||||
|
||||
variable "environment" {}
|
||||
|
||||
variable "notify_no_data" {
|
||||
default = "false"
|
||||
}
|
||||
|
||||
variable "delay" {
|
||||
default = "600"
|
||||
}
|
||||
|
||||
variable "su_utilization_warning" {
|
||||
default = 60
|
||||
}
|
||||
|
||||
variable "su_utilization_critical" {
|
||||
default = 80
|
||||
}
|
||||
|
||||
variable "failed_function_requests_warning" {
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "failed_function_requests_critical" {
|
||||
default = 10
|
||||
}
|
||||
|
||||
variable "conversion_errors_warning" {
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "conversion_errors_critical" {
|
||||
default = 10
|
||||
}
|
||||
|
||||
variable "runtime_errors_warning" {
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "runtime_errors_critical" {
|
||||
default = 0
|
||||
}
|
||||
92
cloud/azure/stream-analytics/monitors-stream-analytics.tf
Normal file
92
cloud/azure/stream-analytics/monitors-stream-analytics.tf
Normal file
@ -0,0 +1,92 @@
|
||||
resource "datadog_monitor" "SU_utilization" {
|
||||
name = "[${var.environment} SU utilization at more than ${var.su_utilization_critical}% on {{host.identifier}}]"
|
||||
message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}"
|
||||
|
||||
query = "avg(last_5m):avg:azure.streamanalytics_streamingjobs.resource_utilization{*} by {name,resource_group} > ${var.su_utilization_critical}"
|
||||
type = "query alert"
|
||||
|
||||
notify_no_data = "${var.notify_no_data}"
|
||||
evaluation_delay = "${var.delay}"
|
||||
renotify_interval = 60
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
new_host_delay = "${var.delay}"
|
||||
no_data_timeframe = 20
|
||||
thresholds {
|
||||
warning = "${var.su_utilization_warning}"
|
||||
critical = "${var.su_utilization_critical}"
|
||||
}
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "failed_function_requests" {
|
||||
name = "[${var.environment} More than ${var.failed_function_requests_critical} failed function requests on {{host.identifier}}]"
|
||||
message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}"
|
||||
|
||||
query = "avg(last_5m):avg:azure.streamanalytics_streamingjobs.aml_callout_failed_requests{*} by {name,resource_group} > ${var.failed_function_requests_critical}"
|
||||
type = "query alert"
|
||||
|
||||
notify_no_data = "${var.notify_no_data}"
|
||||
evaluation_delay = "${var.delay}"
|
||||
renotify_interval = 60
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
new_host_delay = "${var.delay}"
|
||||
no_data_timeframe = 20
|
||||
thresholds {
|
||||
warning = "${var.failed_function_requests_warning}"
|
||||
critical = "${var.failed_function_requests_critical}"
|
||||
}
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "conversion_errors" {
|
||||
name = "[${var.environment} More than ${var.conversion_errors_critical} conversion errors on {{host.identifier}}]"
|
||||
message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}"
|
||||
|
||||
query = "avg(last_5m):avg:azure.streamanalytics_streamingjobs.conversion_errors{*} by {name,resource_group} > ${var.conversion_errors_critical}"
|
||||
type = "query alert"
|
||||
|
||||
notify_no_data = "${var.notify_no_data}"
|
||||
evaluation_delay = "${var.delay}"
|
||||
renotify_interval = 60
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
new_host_delay = "${var.delay}"
|
||||
no_data_timeframe = 20
|
||||
thresholds {
|
||||
warning = "${var.conversion_errors_warning}"
|
||||
critical = "${var.conversion_errors_critical}"
|
||||
}
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "runtime_errors" {
|
||||
name = "[${var.environment} More than ${var.runtime_errors_critical} runtime errors on {{host.identifier}}]"
|
||||
message = "{{#is_alert}}\n${var.hno_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.hno_escalation_group}\n{{/is_recovery}}"
|
||||
|
||||
query = "avg(last_5m):avg:azure.streamanalytics_streamingjobs.errors{*} by {name,resource_group} > ${var.runtime_errors_critical}"
|
||||
type = "query alert"
|
||||
|
||||
notify_no_data = "${var.notify_no_data}"
|
||||
evaluation_delay = "${var.delay}"
|
||||
renotify_interval = 60
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
new_host_delay = "${var.delay}"
|
||||
no_data_timeframe = 20
|
||||
thresholds {
|
||||
warning = "${var.runtime_errors_warning}"
|
||||
critical = "${var.runtime_errors_critical}"
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user