Merged in MON-228-monitors-for-elasticsearch (pull request #152)

MON-228 improve service check

Approved-by: Rafael Romero Carmona <rafael.romero.carmona@fr.clara.net>
Approved-by: Quentin Manfroi <quentin.manfroi@yahoo.fr>
Approved-by: Jean-Philippe LAINÉ <jean-philippe.laine@fr.clara.net>
Approved-by: Adrien Broyere <adrien.broyere@fr.clara.net>
This commit is contained in:
Quentin Manfroi 2018-09-14 13:09:40 +00:00
commit 9497312f87
4 changed files with 99 additions and 111 deletions

View File

@ -170,13 +170,11 @@ Creates DataDog monitors with the following checks:
| node_free_space_threshold_warning | Cluster Status warning threshold | string | `20` | no | | node_free_space_threshold_warning | Cluster Status warning threshold | string | `20` | no |
| node_free_space_time_aggregator | Time aggregator for the Cluster Status monitor | string | `sum` | no | | node_free_space_time_aggregator | Time aggregator for the Cluster Status monitor | string | `sum` | no |
| node_free_space_timeframe | Timeframe for the Cluster Status monitor | string | `last_5m` | no | | node_free_space_timeframe | Timeframe for the Cluster Status monitor | string | `last_5m` | no |
| not_responding_by | Group by for the service check | string | `"port","server"` | no |
| not_responding_extra_tags | Extra tags for Elasticsearch does not respond monitor | list | `<list>` | no | | not_responding_extra_tags | Extra tags for Elasticsearch does not respond monitor | list | `<list>` | no |
| not_responding_last | Parameter 'last' for the service check | string | `1` | no |
| not_responding_message | Custom message for Elasticsearch does not respond monitor | string | `` | no | | not_responding_message | Custom message for Elasticsearch does not respond monitor | string | `` | no |
| not_responding_no_data_timeframe | Elasticsearch not responding monitor no data timeframe | string | `10` | no |
| not_responding_silenced | Groups to mute for Elasticsearch does not respond monitor | map | `<map>` | no | | not_responding_silenced | Groups to mute for Elasticsearch does not respond monitor | map | `<map>` | no |
| not_responding_threshold_critical | Not responding limit (critical threshold) | string | `5` | no | | not_responding_threshold_warning | Elasticsearch not responding limit (warning threshold) | string | `3` | no |
| not_responding_threshold_warning | Not responding limit (warning threshold) | string | `0` | no |
| query_cache_evictions_change_extra_tags | Extra tags for Cluster Status monitor | list | `<list>` | no | | query_cache_evictions_change_extra_tags | Extra tags for Cluster Status monitor | list | `<list>` | no |
| query_cache_evictions_change_message | Custom message for the Cluster Status monitor | string | `` | no | | query_cache_evictions_change_message | Custom message for the Cluster Status monitor | string | `` | no |
| query_cache_evictions_change_silenced | Groups to mute for Cluster Status monitor | map | `<map>` | no | | query_cache_evictions_change_silenced | Groups to mute for Cluster Status monitor | map | `<map>` | no |

View File

@ -1068,26 +1068,15 @@ variable "not_responding_message" {
default = "" default = ""
} }
variable "not_responding_by" {
description = "Group by for the service check"
type = "string"
default = "\"port\",\"server\""
}
variable "not_responding_last" {
description = "Parameter 'last' for the service check"
type = "string"
default = 1
}
variable "not_responding_threshold_critical" {
description = "Not responding limit (critical threshold)"
default = 5
}
variable "not_responding_threshold_warning" { variable "not_responding_threshold_warning" {
description = "Not responding limit (warning threshold)" description = "Elasticsearch not responding limit (warning threshold)"
default = 0 default = 3
}
variable "not_responding_no_data_timeframe" {
description = "Elasticsearch not responding monitor no data timeframe"
type = "string"
default = 10
} }
variable "not_responding_extra_tags" { variable "not_responding_extra_tags" {

View File

@ -1,3 +1,45 @@
#
# Service Check
#
resource "datadog_monitor" "not_responding" {
name = "[${var.environment}] ElasticSearch does not respond"
message = "${coalesce(var.not_responding_message, var.message)}"
query = <<EOL
"elasticsearch.can_connect".over${module.filter-tags.service_check}.by("server","port").last(6).count_by_status()
EOL
type = "service check"
thresholds {
warning = "${var.not_responding_threshold_warning}"
critical = 5
}
silenced = "${var.not_responding_silenced}"
no_data_timeframe = "${var.not_responding_no_data_timeframe}"
notify_no_data = true
notify_audit = false
locked = false
timeout_h = 0
include_tags = true
require_full_window = true
renotify_interval = 0
new_host_delay = "${var.new_host_delay}"
tags = [
"created-by:terraform",
"team:claranet",
"type:database",
"provider:elasticsearch",
"env:${var.environment}",
"resource:elasticsearch",
"${var.not_responding_extra_tags}",
]
}
# #
# Cluster Status Not Green # Cluster Status Not Green
# #
@ -23,7 +65,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -34,7 +76,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.cluster_status_not_green_extra_tags}", "${var.cluster_status_not_green_extra_tags}",
] ]
@ -64,7 +106,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -75,7 +117,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.cluster_initializing_shards_extra_tags}", "${var.cluster_initializing_shards_extra_tags}",
] ]
@ -105,7 +147,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -116,7 +158,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.cluster_relocating_shards_extra_tags}", "${var.cluster_relocating_shards_extra_tags}",
] ]
@ -146,7 +188,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -157,7 +199,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.cluster_unassigned_shards_extra_tags}", "${var.cluster_unassigned_shards_extra_tags}",
] ]
@ -190,7 +232,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -201,7 +243,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.node_free_space_extra_tags}", "${var.node_free_space_extra_tags}",
] ]
@ -231,7 +273,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -242,7 +284,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.jvm_heap_memory_usage_extra_tags}", "${var.jvm_heap_memory_usage_extra_tags}",
] ]
@ -272,7 +314,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -283,7 +325,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.jvm_memory_young_usage_extra_tags}", "${var.jvm_memory_young_usage_extra_tags}",
] ]
@ -313,7 +355,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -324,7 +366,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.jvm_memory_old_usage_extra_tags}", "${var.jvm_memory_old_usage_extra_tags}",
] ]
@ -354,7 +396,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -365,7 +407,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.jvm_gc_old_collection_latency_extra_tags}", "${var.jvm_gc_old_collection_latency_extra_tags}",
] ]
@ -395,7 +437,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -406,7 +448,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.jvm_gc_young_collection_latency_extra_tags}", "${var.jvm_gc_young_collection_latency_extra_tags}",
] ]
@ -437,7 +479,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -448,7 +490,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.indexing_latency_extra_tags}", "${var.indexing_latency_extra_tags}",
] ]
@ -479,7 +521,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -490,7 +532,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.flush_latency_extra_tags}", "${var.flush_latency_extra_tags}",
] ]
@ -528,7 +570,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -539,7 +581,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.http_connections_anomaly_extra_tags}", "${var.http_connections_anomaly_extra_tags}",
] ]
@ -570,7 +612,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -581,7 +623,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.search_query_latency_extra_tags}", "${var.search_query_latency_extra_tags}",
] ]
@ -612,7 +654,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -623,7 +665,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.fetch_latency_extra_tags}", "${var.fetch_latency_extra_tags}",
] ]
@ -653,7 +695,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -664,7 +706,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.search_query_change_extra_tags}", "${var.search_query_change_extra_tags}",
] ]
@ -694,7 +736,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -705,7 +747,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.fetch_change_extra_tags}", "${var.fetch_change_extra_tags}",
] ]
@ -736,7 +778,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -747,7 +789,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.field_data_evictions_change_extra_tags}", "${var.field_data_evictions_change_extra_tags}",
] ]
@ -778,7 +820,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -789,7 +831,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.query_cache_evictions_change_extra_tags}", "${var.query_cache_evictions_change_extra_tags}",
] ]
@ -820,7 +862,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -831,7 +873,7 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.request_cache_evictions_change_extra_tags}", "${var.request_cache_evictions_change_extra_tags}",
] ]
@ -861,7 +903,7 @@ EOF
locked = false locked = false
include_tags = true include_tags = true
require_full_window = true require_full_window = true
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.evaluation_delay}" evaluation_delay = "${var.evaluation_delay}"
@ -872,49 +914,8 @@ EOF
"env:${var.environment}", "env:${var.environment}",
"created-by:terraform", "created-by:terraform",
"team:claranet", "team:claranet",
"type:databases", "type:database",
"provider:elasticsearch", "provider:elasticsearch",
"${var.task_time_in_queue_change_extra_tags}", "${var.task_time_in_queue_change_extra_tags}",
] ]
} }
#
# Service Check
#
resource "datadog_monitor" "not_responding" {
name = "[${var.environment}] ElasticSearch does not respond"
message = "${coalesce(var.not_responding_message, var.message)}"
query = <<EOL
"elasticsearch.can_connect".over${module.filter-tags.service_check}.by(${var.not_responding_by}).last(${var.not_responding_last}).pct_by_status()
EOL
type = "service check"
thresholds {
warning = "${var.not_responding_threshold_warning}"
critical = "${var.not_responding_threshold_critical}"
}
silenced = "${var.not_responding_silenced}"
notify_audit = false
locked = false
timeout_h = 0
include_tags = true
require_full_window = true
notify_no_data = true
renotify_interval = 0
new_host_delay = "${var.new_host_delay}"
tags = [
"created-by:terraform",
"team:claranet",
"type:databases",
"provider:elasticsearch",
"env:${var.environment}",
"resource:elasticsearch",
"${var.not_responding_extra_tags}",
]
}

View File

@ -1,3 +1,8 @@
output "not_responding_id" {
description = "id for monitor not_responding"
value = "${datadog_monitor.not_responding.*.id}"
}
output "cluster_status_not_green_id" { output "cluster_status_not_green_id" {
description = "id for monitor cluster_status_not_green" description = "id for monitor cluster_status_not_green"
value = "${datadog_monitor.cluster_status_not_green.*.id}" value = "${datadog_monitor.cluster_status_not_green.*.id}"
@ -102,8 +107,3 @@ output "task_time_in_queue_change_id" {
description = "id for monitor task_time_in_queue_change" description = "id for monitor task_time_in_queue_change"
value = "${datadog_monitor.task_time_in_queue_change.*.id}" value = "${datadog_monitor.task_time_in_queue_change.*.id}"
} }
output "not_responding_id" {
description = "id for monitor not_responding"
value = "${datadog_monitor.not_responding.*.id}"
}