MON-228 improve service check
This commit is contained in:
parent
391240f597
commit
f0e252df33
@ -170,13 +170,11 @@ Creates DataDog monitors with the following checks:
|
|||||||
| node_free_space_threshold_warning | Cluster Status warning threshold | string | `20` | no |
|
| node_free_space_threshold_warning | Cluster Status warning threshold | string | `20` | no |
|
||||||
| node_free_space_time_aggregator | Time aggregator for the Cluster Status monitor | string | `sum` | no |
|
| node_free_space_time_aggregator | Time aggregator for the Cluster Status monitor | string | `sum` | no |
|
||||||
| node_free_space_timeframe | Timeframe for the Cluster Status monitor | string | `last_5m` | no |
|
| node_free_space_timeframe | Timeframe for the Cluster Status monitor | string | `last_5m` | no |
|
||||||
| not_responding_by | Group by for the service check | string | `"port","server"` | no |
|
|
||||||
| not_responding_extra_tags | Extra tags for Elasticsearch does not respond monitor | list | `<list>` | no |
|
| not_responding_extra_tags | Extra tags for Elasticsearch does not respond monitor | list | `<list>` | no |
|
||||||
| not_responding_last | Parameter 'last' for the service check | string | `1` | no |
|
|
||||||
| not_responding_message | Custom message for Elasticsearch does not respond monitor | string | `` | no |
|
| not_responding_message | Custom message for Elasticsearch does not respond monitor | string | `` | no |
|
||||||
|
| not_responding_no_data_timeframe | Elasticsearch not responding monitor no data timeframe | string | `10` | no |
|
||||||
| not_responding_silenced | Groups to mute for Elasticsearch does not respond monitor | map | `<map>` | no |
|
| not_responding_silenced | Groups to mute for Elasticsearch does not respond monitor | map | `<map>` | no |
|
||||||
| not_responding_threshold_critical | Not responding limit (critical threshold) | string | `5` | no |
|
| not_responding_threshold_warning | Elasticsearch not responding limit (warning threshold) | string | `3` | no |
|
||||||
| not_responding_threshold_warning | Not responding limit (warning threshold) | string | `0` | no |
|
|
||||||
| query_cache_evictions_change_extra_tags | Extra tags for Cluster Status monitor | list | `<list>` | no |
|
| query_cache_evictions_change_extra_tags | Extra tags for Cluster Status monitor | list | `<list>` | no |
|
||||||
| query_cache_evictions_change_message | Custom message for the Cluster Status monitor | string | `` | no |
|
| query_cache_evictions_change_message | Custom message for the Cluster Status monitor | string | `` | no |
|
||||||
| query_cache_evictions_change_silenced | Groups to mute for Cluster Status monitor | map | `<map>` | no |
|
| query_cache_evictions_change_silenced | Groups to mute for Cluster Status monitor | map | `<map>` | no |
|
||||||
|
|||||||
@ -1068,26 +1068,15 @@ variable "not_responding_message" {
|
|||||||
default = ""
|
default = ""
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "not_responding_by" {
|
|
||||||
description = "Group by for the service check"
|
|
||||||
type = "string"
|
|
||||||
default = "\"port\",\"server\""
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "not_responding_last" {
|
|
||||||
description = "Parameter 'last' for the service check"
|
|
||||||
type = "string"
|
|
||||||
default = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "not_responding_threshold_critical" {
|
|
||||||
description = "Not responding limit (critical threshold)"
|
|
||||||
default = 5
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "not_responding_threshold_warning" {
|
variable "not_responding_threshold_warning" {
|
||||||
description = "Not responding limit (warning threshold)"
|
description = "Elasticsearch not responding limit (warning threshold)"
|
||||||
default = 0
|
default = 3
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "not_responding_no_data_timeframe" {
|
||||||
|
description = "Elasticsearch not responding monitor no data timeframe"
|
||||||
|
type = "string"
|
||||||
|
default = 10
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "not_responding_extra_tags" {
|
variable "not_responding_extra_tags" {
|
||||||
|
|||||||
@ -1,3 +1,45 @@
|
|||||||
|
#
|
||||||
|
# Service Check
|
||||||
|
#
|
||||||
|
resource "datadog_monitor" "not_responding" {
|
||||||
|
name = "[${var.environment}] ElasticSearch does not respond"
|
||||||
|
message = "${coalesce(var.not_responding_message, var.message)}"
|
||||||
|
|
||||||
|
query = <<EOL
|
||||||
|
"elasticsearch.can_connect".over${module.filter-tags.service_check}.by("server","port").last(6).count_by_status()
|
||||||
|
EOL
|
||||||
|
|
||||||
|
type = "service check"
|
||||||
|
|
||||||
|
thresholds {
|
||||||
|
warning = "${var.not_responding_threshold_warning}"
|
||||||
|
critical = 5
|
||||||
|
}
|
||||||
|
|
||||||
|
silenced = "${var.not_responding_silenced}"
|
||||||
|
|
||||||
|
no_data_timeframe = "${var.not_responding_no_data_timeframe}"
|
||||||
|
notify_no_data = true
|
||||||
|
notify_audit = false
|
||||||
|
locked = false
|
||||||
|
timeout_h = 0
|
||||||
|
include_tags = true
|
||||||
|
require_full_window = true
|
||||||
|
renotify_interval = 0
|
||||||
|
|
||||||
|
new_host_delay = "${var.new_host_delay}"
|
||||||
|
|
||||||
|
tags = [
|
||||||
|
"created-by:terraform",
|
||||||
|
"team:claranet",
|
||||||
|
"type:databases",
|
||||||
|
"provider:elasticsearch",
|
||||||
|
"env:${var.environment}",
|
||||||
|
"resource:elasticsearch",
|
||||||
|
"${var.not_responding_extra_tags}",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
# Cluster Status Not Green
|
# Cluster Status Not Green
|
||||||
#
|
#
|
||||||
@ -877,44 +919,3 @@ EOF
|
|||||||
"${var.task_time_in_queue_change_extra_tags}",
|
"${var.task_time_in_queue_change_extra_tags}",
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
#
|
|
||||||
# Service Check
|
|
||||||
#
|
|
||||||
resource "datadog_monitor" "not_responding" {
|
|
||||||
name = "[${var.environment}] ElasticSearch does not respond"
|
|
||||||
message = "${coalesce(var.not_responding_message, var.message)}"
|
|
||||||
|
|
||||||
query = <<EOL
|
|
||||||
"elasticsearch.can_connect".over${module.filter-tags.service_check}.by(${var.not_responding_by}).last(${var.not_responding_last}).pct_by_status()
|
|
||||||
EOL
|
|
||||||
|
|
||||||
type = "service check"
|
|
||||||
|
|
||||||
thresholds {
|
|
||||||
warning = "${var.not_responding_threshold_warning}"
|
|
||||||
critical = "${var.not_responding_threshold_critical}"
|
|
||||||
}
|
|
||||||
|
|
||||||
silenced = "${var.not_responding_silenced}"
|
|
||||||
|
|
||||||
notify_audit = false
|
|
||||||
locked = false
|
|
||||||
timeout_h = 0
|
|
||||||
include_tags = true
|
|
||||||
require_full_window = true
|
|
||||||
notify_no_data = true
|
|
||||||
renotify_interval = 0
|
|
||||||
|
|
||||||
new_host_delay = "${var.new_host_delay}"
|
|
||||||
|
|
||||||
tags = [
|
|
||||||
"created-by:terraform",
|
|
||||||
"team:claranet",
|
|
||||||
"type:databases",
|
|
||||||
"provider:elasticsearch",
|
|
||||||
"env:${var.environment}",
|
|
||||||
"resource:elasticsearch",
|
|
||||||
"${var.not_responding_extra_tags}",
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|||||||
@ -1,3 +1,8 @@
|
|||||||
|
output "not_responding_id" {
|
||||||
|
description = "id for monitor not_responding"
|
||||||
|
value = "${datadog_monitor.not_responding.*.id}"
|
||||||
|
}
|
||||||
|
|
||||||
output "cluster_status_not_green_id" {
|
output "cluster_status_not_green_id" {
|
||||||
description = "id for monitor cluster_status_not_green"
|
description = "id for monitor cluster_status_not_green"
|
||||||
value = "${datadog_monitor.cluster_status_not_green.*.id}"
|
value = "${datadog_monitor.cluster_status_not_green.*.id}"
|
||||||
@ -102,8 +107,3 @@ output "task_time_in_queue_change_id" {
|
|||||||
description = "id for monitor task_time_in_queue_change"
|
description = "id for monitor task_time_in_queue_change"
|
||||||
value = "${datadog_monitor.task_time_in_queue_change.*.id}"
|
value = "${datadog_monitor.task_time_in_queue_change.*.id}"
|
||||||
}
|
}
|
||||||
|
|
||||||
output "not_responding_id" {
|
|
||||||
description = "id for monitor not_responding"
|
|
||||||
value = "${datadog_monitor.not_responding.*.id}"
|
|
||||||
}
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user