From fe659be1a9e667a517c0c6b9b52e431b0e888d5c Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Fri, 3 Aug 2018 15:51:33 +0200 Subject: [PATCH] MON-271 Added service check --- middleware/redis/README.md | 9 +++++++ middleware/redis/inputs.tf | 42 ++++++++++++++++++++++++++++++ middleware/redis/monitors-redis.tf | 40 ++++++++++++++++++++++++++++ middleware/redis/outputs.tf | 5 ++++ 4 files changed, 96 insertions(+) diff --git a/middleware/redis/README.md b/middleware/redis/README.md index 492fd66..0dd6acd 100644 --- a/middleware/redis/README.md +++ b/middleware/redis/README.md @@ -25,6 +25,7 @@ Creates DataDog monitors with the following checks: - Redis too many rejected connections - Redis latency is too high - Redis hitrate is too low +- Redis does not respond ## Inputs @@ -83,6 +84,13 @@ Creates DataDog monitors with the following checks: | mem_used_time_aggregator | Monitor aggregator for Redis RAM memory used [available values: min, max or avg] | string | `min` | no | | mem_used_timeframe | Monitor timeframe for Redis RAM memory used [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when a Redis monitor is triggered | string | - | yes | +| not_responding_by | Group by for the service check | string | `"host","redis_host","redis_port"` | no | +| not_responding_last | Parameter 'last' for the service check | string | `6` | no | +| not_responding_message | Custom message for Redis does not respond monitor | string | `` | no | +| not_responding_silenced | Groups to mute for Redis does not respond monitor | map | `` | no | +| not_responding_threshold_critical | Not responding limit (critical threshold) | string | `5` | no | +| not_responding_threshold_ok | Not responding limit (ok threshold) | string | `1` | no | +| not_responding_threshold_warning | Not responding limit (warning threshold) | string | `1` | no | | redis_silenced | Groups to mute for Redis monitors | map | `` | no | | rejected_con_message | Custom message for Redis rejected connections errors monitor | string | `` | no | | rejected_con_silenced | Groups to mute for Redis rejected connections errors monitor | map | `` | no | @@ -103,6 +111,7 @@ Creates DataDog monitors with the following checks: | latency_id | id for monitor latency | | memory_frag_id | id for monitor memory_frag | | memory_used_id | id for monitor memory_used | +| not_responding_id | id for monitor not_responding | | rejected_connections_id | id for monitor rejected_connections | ## Related documentation diff --git a/middleware/redis/inputs.tf b/middleware/redis/inputs.tf index 527b5d4..1a4a1e3 100644 --- a/middleware/redis/inputs.tf +++ b/middleware/redis/inputs.tf @@ -339,3 +339,45 @@ variable "hitrate_threshold_warning" { description = "hitrate limit (warning threshold)" default = 30 } + +# +# Connection Down +# +variable "not_responding_silenced" { + description = "Groups to mute for Redis does not respond monitor" + type = "map" + default = {} +} + +variable "not_responding_message" { + description = "Custom message for Redis does not respond monitor" + type = "string" + default = "" +} + +variable "not_responding_by" { + description = "Group by for the service check" + type = "string" + default = "\"host\",\"redis_host\",\"redis_port\"" +} + +variable "not_responding_last" { + description = "Parameter 'last' for the service check" + type = "string" + default = 6 +} + +variable "not_responding_threshold_critical" { + description = "Not responding limit (critical threshold)" + default = 5 +} + +variable "not_responding_threshold_warning" { + description = "Not responding limit (warning threshold)" + default = 1 +} + +variable "not_responding_threshold_ok" { + description = "Not responding limit (ok threshold)" + default = 1 +} diff --git a/middleware/redis/monitors-redis.tf b/middleware/redis/monitors-redis.tf index 7556eb7..ce16da4 100644 --- a/middleware/redis/monitors-redis.tf +++ b/middleware/redis/monitors-redis.tf @@ -369,3 +369,43 @@ EOL "resource:redis", ] } + +# +# Service Check +# +resource "datadog_monitor" "not_responding" { + name = "[${var.environment}] Redis does not respond" + message = "${coalesce(var.not_responding_message, var.message)}" + + query = <