MON-271 Added service check
This commit is contained in:
parent
cfdfb64739
commit
fe659be1a9
@ -25,6 +25,7 @@ Creates DataDog monitors with the following checks:
|
|||||||
- Redis too many rejected connections
|
- Redis too many rejected connections
|
||||||
- Redis latency is too high
|
- Redis latency is too high
|
||||||
- Redis hitrate is too low
|
- Redis hitrate is too low
|
||||||
|
- Redis does not respond
|
||||||
|
|
||||||
## Inputs
|
## Inputs
|
||||||
|
|
||||||
@ -83,6 +84,13 @@ Creates DataDog monitors with the following checks:
|
|||||||
| mem_used_time_aggregator | Monitor aggregator for Redis RAM memory used [available values: min, max or avg] | string | `min` | no |
|
| mem_used_time_aggregator | Monitor aggregator for Redis RAM memory used [available values: min, max or avg] | string | `min` | no |
|
||||||
| mem_used_timeframe | Monitor timeframe for Redis RAM memory used [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
| mem_used_timeframe | Monitor timeframe for Redis RAM memory used [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||||
| message | Message sent when a Redis monitor is triggered | string | - | yes |
|
| message | Message sent when a Redis monitor is triggered | string | - | yes |
|
||||||
|
| not_responding_by | Group by for the service check | string | `"host","redis_host","redis_port"` | no |
|
||||||
|
| not_responding_last | Parameter 'last' for the service check | string | `6` | no |
|
||||||
|
| not_responding_message | Custom message for Redis does not respond monitor | string | `` | no |
|
||||||
|
| not_responding_silenced | Groups to mute for Redis does not respond monitor | map | `<map>` | no |
|
||||||
|
| not_responding_threshold_critical | Not responding limit (critical threshold) | string | `5` | no |
|
||||||
|
| not_responding_threshold_ok | Not responding limit (ok threshold) | string | `1` | no |
|
||||||
|
| not_responding_threshold_warning | Not responding limit (warning threshold) | string | `1` | no |
|
||||||
| redis_silenced | Groups to mute for Redis monitors | map | `<map>` | no |
|
| redis_silenced | Groups to mute for Redis monitors | map | `<map>` | no |
|
||||||
| rejected_con_message | Custom message for Redis rejected connections errors monitor | string | `` | no |
|
| rejected_con_message | Custom message for Redis rejected connections errors monitor | string | `` | no |
|
||||||
| rejected_con_silenced | Groups to mute for Redis rejected connections errors monitor | map | `<map>` | no |
|
| rejected_con_silenced | Groups to mute for Redis rejected connections errors monitor | map | `<map>` | no |
|
||||||
@ -103,6 +111,7 @@ Creates DataDog monitors with the following checks:
|
|||||||
| latency_id | id for monitor latency |
|
| latency_id | id for monitor latency |
|
||||||
| memory_frag_id | id for monitor memory_frag |
|
| memory_frag_id | id for monitor memory_frag |
|
||||||
| memory_used_id | id for monitor memory_used |
|
| memory_used_id | id for monitor memory_used |
|
||||||
|
| not_responding_id | id for monitor not_responding |
|
||||||
| rejected_connections_id | id for monitor rejected_connections |
|
| rejected_connections_id | id for monitor rejected_connections |
|
||||||
|
|
||||||
## Related documentation
|
## Related documentation
|
||||||
|
|||||||
@ -339,3 +339,45 @@ variable "hitrate_threshold_warning" {
|
|||||||
description = "hitrate limit (warning threshold)"
|
description = "hitrate limit (warning threshold)"
|
||||||
default = 30
|
default = 30
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Connection Down
|
||||||
|
#
|
||||||
|
variable "not_responding_silenced" {
|
||||||
|
description = "Groups to mute for Redis does not respond monitor"
|
||||||
|
type = "map"
|
||||||
|
default = {}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "not_responding_message" {
|
||||||
|
description = "Custom message for Redis does not respond monitor"
|
||||||
|
type = "string"
|
||||||
|
default = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "not_responding_by" {
|
||||||
|
description = "Group by for the service check"
|
||||||
|
type = "string"
|
||||||
|
default = "\"host\",\"redis_host\",\"redis_port\""
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "not_responding_last" {
|
||||||
|
description = "Parameter 'last' for the service check"
|
||||||
|
type = "string"
|
||||||
|
default = 6
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "not_responding_threshold_critical" {
|
||||||
|
description = "Not responding limit (critical threshold)"
|
||||||
|
default = 5
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "not_responding_threshold_warning" {
|
||||||
|
description = "Not responding limit (warning threshold)"
|
||||||
|
default = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "not_responding_threshold_ok" {
|
||||||
|
description = "Not responding limit (ok threshold)"
|
||||||
|
default = 1
|
||||||
|
}
|
||||||
|
|||||||
@ -369,3 +369,43 @@ EOL
|
|||||||
"resource:redis",
|
"resource:redis",
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Service Check
|
||||||
|
#
|
||||||
|
resource "datadog_monitor" "not_responding" {
|
||||||
|
name = "[${var.environment}] Redis does not respond"
|
||||||
|
message = "${coalesce(var.not_responding_message, var.message)}"
|
||||||
|
|
||||||
|
query = <<EOL
|
||||||
|
"redis.can_connect".over("${replace(data.template_file.filter.rendered, ",", "\",\"")}").by(${var.not_responding_by}).last(${var.not_responding_last}).count_by_status()
|
||||||
|
EOL
|
||||||
|
|
||||||
|
type = "service check"
|
||||||
|
|
||||||
|
thresholds {
|
||||||
|
warning = "${var.not_responding_threshold_warning}"
|
||||||
|
critical = "${var.not_responding_threshold_critical}"
|
||||||
|
ok = "${var.not_responding_threshold_ok}"
|
||||||
|
}
|
||||||
|
|
||||||
|
silenced = "${var.not_responding_silenced}"
|
||||||
|
|
||||||
|
notify_audit = false
|
||||||
|
locked = false
|
||||||
|
timeout_h = 0
|
||||||
|
include_tags = true
|
||||||
|
no_data_timeframe = 2
|
||||||
|
require_full_window = true
|
||||||
|
notify_no_data = true
|
||||||
|
renotify_interval = 0
|
||||||
|
|
||||||
|
evaluation_delay = "${var.delay}"
|
||||||
|
new_host_delay = "${var.delay}"
|
||||||
|
|
||||||
|
tags = [
|
||||||
|
"created_by:terraform",
|
||||||
|
"env:${var.environment}",
|
||||||
|
"resource:redis",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|||||||
@ -42,3 +42,8 @@ output "hitrate_id" {
|
|||||||
description = "id for monitor hitrate"
|
description = "id for monitor hitrate"
|
||||||
value = "${datadog_monitor.hitrate.id}"
|
value = "${datadog_monitor.hitrate.id}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
output "not_responding_id" {
|
||||||
|
description = "id for monitor not_responding"
|
||||||
|
value = "${datadog_monitor.not_responding.id}"
|
||||||
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user