MON-271 improve service check

This commit is contained in:
Quentin Manfroi 2018-09-11 16:10:29 +02:00
parent 391240f597
commit 69e9f19a72
4 changed files with 54 additions and 39 deletions

View File

@ -104,7 +104,9 @@ Creates DataDog monitors with the following checks:
| not_responding_enabled | Flag to enable Redis does not respond monitor | string | `true` | no | | not_responding_enabled | Flag to enable Redis does not respond monitor | string | `true` | no |
| not_responding_extra_tags | Extra tags for Redis does not respond monitor | list | `<list>` | no | | not_responding_extra_tags | Extra tags for Redis does not respond monitor | list | `<list>` | no |
| not_responding_message | Custom message for Redis does not respond monitor | string | `` | no | | not_responding_message | Custom message for Redis does not respond monitor | string | `` | no |
| not_responding_no_data_timeframe | Redis does not respond monitor no data timeframe | string | `10` | no |
| not_responding_silenced | Groups to mute for Redis does not respond monitor | map | `<map>` | no | | not_responding_silenced | Groups to mute for Redis does not respond monitor | map | `<map>` | no |
| not_responding_threshold_warning | Redis does not respond monitor (warning threshold) | string | `3` | no |
| rejected_con_enabled | Flag to enable Redis rejected connections errors monitor | string | `true` | no | | rejected_con_enabled | Flag to enable Redis rejected connections errors monitor | string | `true` | no |
| rejected_con_extra_tags | Extra tags for Redis rejected connections errors monitor | list | `<list>` | no | | rejected_con_extra_tags | Extra tags for Redis rejected connections errors monitor | list | `<list>` | no |
| rejected_con_message | Custom message for Redis rejected connections errors monitor | string | `` | no | | rejected_con_message | Custom message for Redis rejected connections errors monitor | string | `` | no |

View File

@ -474,3 +474,15 @@ variable "not_responding_extra_tags" {
type = "list" type = "list"
default = [] default = []
} }
variable "not_responding_threshold_warning" {
description = "Redis does not respond monitor (warning threshold)"
type = "string"
default = 3
}
variable "not_responding_no_data_timeframe" {
description = "Redis does not respond monitor no data timeframe"
type = "string"
default = 10
}

View File

@ -1,3 +1,38 @@
#
# Service Check
#
resource "datadog_monitor" "not_responding" {
count = "${var.not_responding_enabled ? 1 : 0}"
name = "[${var.environment}] Redis does not respond"
message = "${coalesce(var.not_responding_message, var.message)}"
type = "service check"
query = <<EOF
"redis.can_connect".over${module.filter-tags.service_check}.by("redis_host","redis_port").last(6).count_by_status()
EOF
thresholds {
warning = "${var.not_responding_threshold_warning}"
critical = 5
}
silenced = "${var.not_responding_silenced}"
notify_no_data = true
no_data_timeframe = "${var.not_responding_no_data_timeframe}"
notify_audit = false
locked = false
timeout_h = 0
include_tags = true
require_full_window = true
renotify_interval = 0
new_host_delay = "${var.new_host_delay}"
tags = ["env:${var.environment}", "type:database", "provider:redisdb", "resource:redis", "team:claranet", "created-by:terraform", "${var.not_responding_extra_tags}"]
}
resource "datadog_monitor" "evicted_keys" { resource "datadog_monitor" "evicted_keys" {
count = "${var.evictedkeys_change_enabled ? 1 : 0}" count = "${var.evictedkeys_change_enabled ? 1 : 0}"
name = "[${var.environment}] Redis evicted keys {{#is_alert}}{{{comparator}}} {{threshold}}% (+{{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% (+{{value}}%){{/is_warning}}" name = "[${var.environment}] Redis evicted keys {{#is_alert}}{{{comparator}}} {{threshold}}% (+{{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% (+{{value}}%){{/is_warning}}"
@ -298,37 +333,3 @@ EOL
tags = ["env:${var.environment}", "type:database", "provider:redisdb", "resource:redis", "team:claranet", "created-by:terraform", "${var.hitrate_extra_tags}"] tags = ["env:${var.environment}", "type:database", "provider:redisdb", "resource:redis", "team:claranet", "created-by:terraform", "${var.hitrate_extra_tags}"]
} }
#
# Service Check
#
resource "datadog_monitor" "not_responding" {
count = "${var.not_responding_enabled ? 1 : 0}"
name = "[${var.environment}] Redis does not respond"
message = "${coalesce(var.not_responding_message, var.message)}"
type = "service check"
query = <<EOF
"redis.can_connect".over${module.filter-tags.service_check}.by("host","redis_host","redis_port").last(6).count_by_status()
EOF
thresholds {
ok = 1
critical = 5
}
silenced = "${var.not_responding_silenced}"
notify_audit = false
locked = false
timeout_h = 0
include_tags = true
require_full_window = true
notify_no_data = true
renotify_interval = 0
new_host_delay = "${var.new_host_delay}"
tags = ["env:${var.environment}", "type:database", "provider:redisdb", "resource:redis", "team:claranet", "created-by:terraform", "${var.not_responding_extra_tags}"]
}

View File

@ -1,3 +1,8 @@
output "not_responding_id" {
description = "id for monitor not_responding"
value = "${datadog_monitor.not_responding.*.id}"
}
output "evicted_keys_id" { output "evicted_keys_id" {
description = "id for monitor evicted_keys" description = "id for monitor evicted_keys"
value = "${datadog_monitor.evicted_keys.*.id}" value = "${datadog_monitor.evicted_keys.*.id}"
@ -42,8 +47,3 @@ output "hitrate_id" {
description = "id for monitor hitrate" description = "id for monitor hitrate"
value = "${datadog_monitor.hitrate.*.id}" value = "${datadog_monitor.hitrate.*.id}"
} }
output "not_responding_id" {
description = "id for monitor not_responding"
value = "${datadog_monitor.not_responding.*.id}"
}