Merge branch 'MON-512_fix_elasticsearch_latencies' into 'master'

Resolve MON-512 "Fix elasticsearch latencies"

Closes MON-512

See merge request claranet/pt-monitoring/projects/datadog/terraform/monitors!123
This commit is contained in:
Quentin Manfroi 2019-10-18 13:33:06 +02:00
commit 800fe16141
3 changed files with 58 additions and 40 deletions

View File

@ -17,7 +17,7 @@ module "datadog-monitors-database-elasticsearch" {
Creates DataDog monitors with the following checks: Creates DataDog monitors with the following checks:
- Elasticsearch average index flushing to disk latency - Elasticsearch average index flushing to disk latency
- Elasticsearch average indexing time by document - Elasticsearch average indexing latency by document
- Elasticsearch average Old-generation garbage collections latency - Elasticsearch average Old-generation garbage collections latency
- Elasticsearch average search fetch latency - Elasticsearch average search fetch latency
- Elasticsearch average search query latency - Elasticsearch average search query latency
@ -84,10 +84,10 @@ Creates DataDog monitors with the following checks:
| fetch\_latency\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no | | fetch\_latency\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no |
| fetch\_latency\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no | | fetch\_latency\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no |
| fetch\_latency\_message | Custom message for the Cluster Status monitor | string | `""` | no | | fetch\_latency\_message | Custom message for the Cluster Status monitor | string | `""` | no |
| fetch\_latency\_threshold\_critical | Cluster Status critical threshold | string | `"4"` | no | | fetch\_latency\_threshold\_critical | Cluster Status critical threshold | string | `"20"` | no |
| fetch\_latency\_threshold\_warning | Cluster Status warning threshold | string | `"2"` | no | | fetch\_latency\_threshold\_warning | Cluster Status warning threshold | string | `"10"` | no |
| fetch\_latency\_time\_aggregator | Time aggregator for the Cluster Status monitor | string | `"min"` | no | | fetch\_latency\_time\_aggregator | Time aggregator for the Cluster Status monitor | string | `"min"` | no |
| fetch\_latency\_timeframe | Timeframe for the Cluster Status monitor | string | `"last_10m"` | no | | fetch\_latency\_timeframe | Timeframe for the Cluster Status monitor | string | `"last_15m"` | no |
| field\_data\_evictions\_change\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no | | field\_data\_evictions\_change\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no |
| field\_data\_evictions\_change\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no | | field\_data\_evictions\_change\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no |
| field\_data\_evictions\_change\_message | Custom message for the Cluster Status monitor | string | `""` | no | | field\_data\_evictions\_change\_message | Custom message for the Cluster Status monitor | string | `""` | no |
@ -102,10 +102,10 @@ Creates DataDog monitors with the following checks:
| flush\_latency\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no | | flush\_latency\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no |
| flush\_latency\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no | | flush\_latency\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no |
| flush\_latency\_message | Custom message for the Cluster Status monitor | string | `""` | no | | flush\_latency\_message | Custom message for the Cluster Status monitor | string | `""` | no |
| flush\_latency\_threshold\_critical | Cluster Status critical threshold | string | `"100"` | no | | flush\_latency\_threshold\_critical | Cluster Status critical threshold | string | `"150"` | no |
| flush\_latency\_threshold\_warning | Cluster Status warning threshold | string | `"50"` | no | | flush\_latency\_threshold\_warning | Cluster Status warning threshold | string | `"100"` | no |
| flush\_latency\_time\_aggregator | Time aggregator for the Cluster Status monitor | string | `"avg"` | no | | flush\_latency\_time\_aggregator | Time aggregator for the Cluster Status monitor | string | `"avg"` | no |
| flush\_latency\_timeframe | Timeframe for the Cluster Status monitor | string | `"last_10m"` | no | | flush\_latency\_timeframe | Timeframe for the Cluster Status monitor | string | `"last_15m"` | no |
| http\_connections\_anomaly\_alert\_window | Alert window. | string | `"last_15m"` | no | | http\_connections\_anomaly\_alert\_window | Alert window. | string | `"last_15m"` | no |
| http\_connections\_anomaly\_count\_default\_zero | Count default zero. | string | `"true"` | no | | http\_connections\_anomaly\_count\_default\_zero | Count default zero. | string | `"true"` | no |
| http\_connections\_anomaly\_detection\_algorithm | Anomaly Detection Algorithm used | string | `"agile"` | no | | http\_connections\_anomaly\_detection\_algorithm | Anomaly Detection Algorithm used | string | `"agile"` | no |
@ -123,24 +123,24 @@ Creates DataDog monitors with the following checks:
| indexing\_latency\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no | | indexing\_latency\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no |
| indexing\_latency\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no | | indexing\_latency\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no |
| indexing\_latency\_message | Custom message for the Cluster Status monitor | string | `""` | no | | indexing\_latency\_message | Custom message for the Cluster Status monitor | string | `""` | no |
| indexing\_latency\_threshold\_critical | Cluster Status critical threshold | string | `"15"` | no | | indexing\_latency\_threshold\_critical | Cluster Status critical threshold | string | `"30"` | no |
| indexing\_latency\_threshold\_warning | Cluster Status warning threshold | string | `"10"` | no | | indexing\_latency\_threshold\_warning | Cluster Status warning threshold | string | `"15"` | no |
| indexing\_latency\_time\_aggregator | Time aggregator for the Cluster Status monitor | string | `"avg"` | no | | indexing\_latency\_time\_aggregator | Time aggregator for the Cluster Status monitor | string | `"avg"` | no |
| indexing\_latency\_timeframe | Timeframe for the Cluster Status monitor | string | `"last_10m"` | no | | indexing\_latency\_timeframe | Timeframe for the Cluster Status monitor | string | `"last_10m"` | no |
| jvm\_gc\_old\_collection\_latency\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no | | jvm\_gc\_old\_collection\_latency\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no |
| jvm\_gc\_old\_collection\_latency\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no | | jvm\_gc\_old\_collection\_latency\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no |
| jvm\_gc\_old\_collection\_latency\_message | Custom message for the Cluster Status monitor | string | `""` | no | | jvm\_gc\_old\_collection\_latency\_message | Custom message for the Cluster Status monitor | string | `""` | no |
| jvm\_gc\_old\_collection\_latency\_threshold\_critical | Cluster Status critical threshold | string | `"200"` | no | | jvm\_gc\_old\_collection\_latency\_threshold\_critical | Cluster Status critical threshold | string | `"300"` | no |
| jvm\_gc\_old\_collection\_latency\_threshold\_warning | Cluster Status warning threshold | string | `"160"` | no | | jvm\_gc\_old\_collection\_latency\_threshold\_warning | Cluster Status warning threshold | string | `"200"` | no |
| jvm\_gc\_old\_collection\_latency\_time\_aggregator | Time aggregator for the Cluster Status monitor | string | `"avg"` | no | | jvm\_gc\_old\_collection\_latency\_time\_aggregator | Time aggregator for the Cluster Status monitor | string | `"avg"` | no |
| jvm\_gc\_old\_collection\_latency\_timeframe | Timeframe for the Cluster Status monitor | string | `"last_10m"` | no | | jvm\_gc\_old\_collection\_latency\_timeframe | Timeframe for the Cluster Status monitor | string | `"last_15m"` | no |
| jvm\_gc\_young\_collection\_latency\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no | | jvm\_gc\_young\_collection\_latency\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no |
| jvm\_gc\_young\_collection\_latency\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no | | jvm\_gc\_young\_collection\_latency\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no |
| jvm\_gc\_young\_collection\_latency\_message | Custom message for the Cluster Status monitor | string | `""` | no | | jvm\_gc\_young\_collection\_latency\_message | Custom message for the Cluster Status monitor | string | `""` | no |
| jvm\_gc\_young\_collection\_latency\_threshold\_critical | Cluster Status critical threshold | string | `"25"` | no | | jvm\_gc\_young\_collection\_latency\_threshold\_critical | Cluster Status critical threshold | string | `"40"` | no |
| jvm\_gc\_young\_collection\_latency\_threshold\_warning | Cluster Status warning threshold | string | `"20"` | no | | jvm\_gc\_young\_collection\_latency\_threshold\_warning | Cluster Status warning threshold | string | `"20"` | no |
| jvm\_gc\_young\_collection\_latency\_time\_aggregator | Time aggregator for the Cluster Status monitor | string | `"avg"` | no | | jvm\_gc\_young\_collection\_latency\_time\_aggregator | Time aggregator for the Cluster Status monitor | string | `"avg"` | no |
| jvm\_gc\_young\_collection\_latency\_timeframe | Timeframe for the Cluster Status monitor | string | `"last_10m"` | no | | jvm\_gc\_young\_collection\_latency\_timeframe | Timeframe for the Cluster Status monitor | string | `"last_15m"` | no |
| jvm\_heap\_memory\_usage\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no | | jvm\_heap\_memory\_usage\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no |
| jvm\_heap\_memory\_usage\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no | | jvm\_heap\_memory\_usage\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no |
| jvm\_heap\_memory\_usage\_message | Custom message for the Cluster Status monitor | string | `""` | no | | jvm\_heap\_memory\_usage\_message | Custom message for the Cluster Status monitor | string | `""` | no |
@ -204,10 +204,10 @@ Creates DataDog monitors with the following checks:
| search\_query\_latency\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no | | search\_query\_latency\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no |
| search\_query\_latency\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no | | search\_query\_latency\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no |
| search\_query\_latency\_message | Custom message for the Cluster Status monitor | string | `""` | no | | search\_query\_latency\_message | Custom message for the Cluster Status monitor | string | `""` | no |
| search\_query\_latency\_threshold\_critical | Cluster Status critical threshold | string | `"1"` | no | | search\_query\_latency\_threshold\_critical | Cluster Status critical threshold | string | `"20"` | no |
| search\_query\_latency\_threshold\_warning | Cluster Status warning threshold | string | `"0.5"` | no | | search\_query\_latency\_threshold\_warning | Cluster Status warning threshold | string | `"10"` | no |
| search\_query\_latency\_time\_aggregator | Time aggregator for the Cluster Status monitor | string | `"avg"` | no | | search\_query\_latency\_time\_aggregator | Time aggregator for the Cluster Status monitor | string | `"avg"` | no |
| search\_query\_latency\_timeframe | Timeframe for the Cluster Status monitor | string | `"last_10m"` | no | | search\_query\_latency\_timeframe | Timeframe for the Cluster Status monitor | string | `"last_15m"` | no |
| task\_time\_in\_queue\_change\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no | | task\_time\_in\_queue\_change\_enabled | Flag to enable Cluster Status monitor | string | `"true"` | no |
| task\_time\_in\_queue\_change\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no | | task\_time\_in\_queue\_change\_extra\_tags | Extra tags for Cluster Status monitor | list(string) | `[]` | no |
| task\_time\_in\_queue\_change\_message | Custom message for the Cluster Status monitor | string | `""` | no | | task\_time\_in\_queue\_change\_message | Custom message for the Cluster Status monitor | string | `""` | no |

View File

@ -418,19 +418,19 @@ variable "jvm_gc_old_collection_latency_time_aggregator" {
variable "jvm_gc_old_collection_latency_timeframe" { variable "jvm_gc_old_collection_latency_timeframe" {
description = "Timeframe for the Cluster Status monitor" description = "Timeframe for the Cluster Status monitor"
type = string type = string
default = "last_10m" default = "last_15m"
} }
variable "jvm_gc_old_collection_latency_threshold_warning" { variable "jvm_gc_old_collection_latency_threshold_warning" {
description = "Cluster Status warning threshold" description = "Cluster Status warning threshold"
type = string type = string
default = 160 default = 200
} }
variable "jvm_gc_old_collection_latency_threshold_critical" { variable "jvm_gc_old_collection_latency_threshold_critical" {
description = "Cluster Status critical threshold" description = "Cluster Status critical threshold"
type = string type = string
default = 200 default = 300
} }
variable "jvm_gc_old_collection_latency_enabled" { variable "jvm_gc_old_collection_latency_enabled" {
@ -463,7 +463,7 @@ variable "jvm_gc_young_collection_latency_time_aggregator" {
variable "jvm_gc_young_collection_latency_timeframe" { variable "jvm_gc_young_collection_latency_timeframe" {
description = "Timeframe for the Cluster Status monitor" description = "Timeframe for the Cluster Status monitor"
type = string type = string
default = "last_10m" default = "last_15m"
} }
variable "jvm_gc_young_collection_latency_threshold_warning" { variable "jvm_gc_young_collection_latency_threshold_warning" {
@ -475,7 +475,7 @@ variable "jvm_gc_young_collection_latency_threshold_warning" {
variable "jvm_gc_young_collection_latency_threshold_critical" { variable "jvm_gc_young_collection_latency_threshold_critical" {
description = "Cluster Status critical threshold" description = "Cluster Status critical threshold"
type = string type = string
default = 25 default = 40
} }
variable "jvm_gc_young_collection_latency_enabled" { variable "jvm_gc_young_collection_latency_enabled" {
@ -514,13 +514,13 @@ variable "indexing_latency_timeframe" {
variable "indexing_latency_threshold_warning" { variable "indexing_latency_threshold_warning" {
description = "Cluster Status warning threshold" description = "Cluster Status warning threshold"
type = string type = string
default = 10 default = 15
} }
variable "indexing_latency_threshold_critical" { variable "indexing_latency_threshold_critical" {
description = "Cluster Status critical threshold" description = "Cluster Status critical threshold"
type = string type = string
default = 15 default = 30
} }
variable "indexing_latency_enabled" { variable "indexing_latency_enabled" {
@ -553,19 +553,19 @@ variable "flush_latency_time_aggregator" {
variable "flush_latency_timeframe" { variable "flush_latency_timeframe" {
description = "Timeframe for the Cluster Status monitor" description = "Timeframe for the Cluster Status monitor"
type = string type = string
default = "last_10m" default = "last_15m"
} }
variable "flush_latency_threshold_warning" { variable "flush_latency_threshold_warning" {
description = "Cluster Status warning threshold" description = "Cluster Status warning threshold"
type = string type = string
default = 50 default = 100
} }
variable "flush_latency_threshold_critical" { variable "flush_latency_threshold_critical" {
description = "Cluster Status critical threshold" description = "Cluster Status critical threshold"
type = string type = string
default = 100 default = 150
} }
variable "flush_latency_enabled" { variable "flush_latency_enabled" {
@ -685,19 +685,19 @@ variable "search_query_latency_time_aggregator" {
variable "search_query_latency_timeframe" { variable "search_query_latency_timeframe" {
description = "Timeframe for the Cluster Status monitor" description = "Timeframe for the Cluster Status monitor"
type = string type = string
default = "last_10m" default = "last_15m"
} }
variable "search_query_latency_threshold_warning" { variable "search_query_latency_threshold_warning" {
description = "Cluster Status warning threshold" description = "Cluster Status warning threshold"
type = string type = string
default = 0.5 default = 10
} }
variable "search_query_latency_threshold_critical" { variable "search_query_latency_threshold_critical" {
description = "Cluster Status critical threshold" description = "Cluster Status critical threshold"
type = string type = string
default = 1 default = 20
} }
variable "search_query_latency_enabled" { variable "search_query_latency_enabled" {
@ -730,19 +730,19 @@ variable "fetch_latency_time_aggregator" {
variable "fetch_latency_timeframe" { variable "fetch_latency_timeframe" {
description = "Timeframe for the Cluster Status monitor" description = "Timeframe for the Cluster Status monitor"
type = string type = string
default = "last_10m" default = "last_15m"
} }
variable "fetch_latency_threshold_warning" { variable "fetch_latency_threshold_warning" {
description = "Cluster Status warning threshold" description = "Cluster Status warning threshold"
type = string type = string
default = 2 default = 10
} }
variable "fetch_latency_threshold_critical" { variable "fetch_latency_threshold_critical" {
description = "Cluster Status critical threshold" description = "Cluster Status critical threshold"
type = string type = string
default = 4 default = 20
} }
variable "fetch_latency_enabled" { variable "fetch_latency_enabled" {

View File

@ -329,7 +329,10 @@ resource "datadog_monitor" "jvm_gc_old_collection_latency" {
query = <<EOQ query = <<EOQ
${var.jvm_gc_old_collection_latency_time_aggregator}(${var.jvm_gc_old_collection_latency_timeframe}): ${var.jvm_gc_old_collection_latency_time_aggregator}(${var.jvm_gc_old_collection_latency_timeframe}):
avg:jvm.gc.collectors.old.collection_time${module.filter-tags.query_alert} by {node_name} / avg:jvm.gc.collectors.old.count${module.filter-tags.query_alert} by {node_name} * 1000 default(
diff(avg:jvm.gc.collectors.old.collection_time${module.filter-tags.query_alert} by {node_name}) /
diff(avg:jvm.gc.collectors.old.count${module.filter-tags.query_alert} by {node_name})
* 1000, 0)
> ${var.jvm_gc_old_collection_latency_threshold_critical} > ${var.jvm_gc_old_collection_latency_threshold_critical}
EOQ EOQ
@ -364,7 +367,10 @@ resource "datadog_monitor" "jvm_gc_young_collection_latency" {
query = <<EOQ query = <<EOQ
${var.jvm_gc_young_collection_latency_time_aggregator}(${var.jvm_gc_young_collection_latency_timeframe}): ${var.jvm_gc_young_collection_latency_time_aggregator}(${var.jvm_gc_young_collection_latency_timeframe}):
avg:jvm.gc.collectors.young.collection_time${module.filter-tags.query_alert} by {node_name} / avg:jvm.gc.collectors.young.count${module.filter-tags.query_alert} by {node_name} * 1000 default(
diff(avg:jvm.gc.collectors.young.collection_time${module.filter-tags.query_alert} by {node_name}) /
diff(avg:jvm.gc.collectors.young.count${module.filter-tags.query_alert} by {node_name})
* 1000, 0)
> ${var.jvm_gc_young_collection_latency_threshold_critical} > ${var.jvm_gc_young_collection_latency_threshold_critical}
EOQ EOQ
@ -393,14 +399,17 @@ EOQ
# #
resource "datadog_monitor" "indexing_latency" { resource "datadog_monitor" "indexing_latency" {
count = var.indexing_latency_enabled == "true" ? 1 : 0 count = var.indexing_latency_enabled == "true" ? 1 : 0
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticsearch average indexing time by document {{#is_alert}}{{{comparator}}} {{threshold}}ms ({{value}}ms){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}ms ({{value}}ms){{/is_warning}}" name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Elasticsearch average indexing latency by document {{#is_alert}}{{{comparator}}} {{threshold}}ms ({{value}}ms){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}ms ({{value}}ms){{/is_warning}}"
message = coalesce(var.indexing_latency_message, var.message) message = coalesce(var.indexing_latency_message, var.message)
type = "query alert" type = "query alert"
// TODO add tags to filter by node type and do not apply this monitor on non-data nodes // TODO add tags to filter by node type and do not apply this monitor on non-data nodes
query = <<EOQ query = <<EOQ
${var.indexing_latency_time_aggregator}(${var.indexing_latency_timeframe}): ${var.indexing_latency_time_aggregator}(${var.indexing_latency_timeframe}):
avg:elasticsearch.indexing.index.time${module.filter-tags.query_alert} by {node_name}/ avg:elasticsearch.indexing.index.total${module.filter-tags.query_alert} by {node_name} * 1000 default(
diff(avg:elasticsearch.indexing.index.time${module.filter-tags.query_alert} by {node_name}) /
diff(avg:elasticsearch.indexing.index.total${module.filter-tags.query_alert} by {node_name})
* 1000, 0)
> ${var.indexing_latency_threshold_critical} > ${var.indexing_latency_threshold_critical}
EOQ EOQ
@ -436,7 +445,10 @@ resource "datadog_monitor" "flush_latency" {
// TODO add tags to filter by node type and do not apply this monitor on non-data nodes // TODO add tags to filter by node type and do not apply this monitor on non-data nodes
query = <<EOQ query = <<EOQ
${var.flush_latency_time_aggregator}(${var.flush_latency_timeframe}): ${var.flush_latency_time_aggregator}(${var.flush_latency_timeframe}):
avg:elasticsearch.flush.total.time${module.filter-tags.query_alert} by {node_name} / avg:elasticsearch.flush.total${module.filter-tags.query_alert} by {node_name} * 1000 default(
diff(avg:elasticsearch.flush.total.time${module.filter-tags.query_alert} by {node_name}) /
diff(avg:elasticsearch.flush.total${module.filter-tags.query_alert} by {node_name})
* 1000, 0)
> ${var.flush_latency_threshold_critical} > ${var.flush_latency_threshold_critical}
EOQ EOQ
@ -520,7 +532,10 @@ resource "datadog_monitor" "search_query_latency" {
// TODO add tags to filter by node type and do not apply this monitor on non-data nodes // TODO add tags to filter by node type and do not apply this monitor on non-data nodes
query = <<EOQ query = <<EOQ
${var.search_query_latency_time_aggregator}(${var.search_query_latency_timeframe}): ${var.search_query_latency_time_aggregator}(${var.search_query_latency_timeframe}):
avg:elasticsearch.search.query.time${module.filter-tags.query_alert} by {node_name} / avg:elasticsearch.search.query.total${module.filter-tags.query_alert} by {node_name} * 1000 default(
diff(avg:elasticsearch.search.query.time${module.filter-tags.query_alert} by {node_name}) /
diff(avg:elasticsearch.search.query.total${module.filter-tags.query_alert} by {node_name})
* 1000, 0)
> ${var.search_query_latency_threshold_critical} > ${var.search_query_latency_threshold_critical}
EOQ EOQ
@ -556,7 +571,10 @@ resource "datadog_monitor" "fetch_latency" {
// TODO add tags to filter by node type and do not apply this monitor on non-data nodes // TODO add tags to filter by node type and do not apply this monitor on non-data nodes
query = <<EOQ query = <<EOQ
${var.fetch_latency_time_aggregator}(${var.fetch_latency_timeframe}): ${var.fetch_latency_time_aggregator}(${var.fetch_latency_timeframe}):
avg:elasticsearch.search.fetch.time${module.filter-tags.query_alert} by {node_name} / avg:elasticsearch.search.fetch.total${module.filter-tags.query_alert} by {node_name} * 1000 default(
diff(avg:elasticsearch.search.fetch.time${module.filter-tags.query_alert} by {node_name}) /
diff(avg:elasticsearch.search.fetch.total${module.filter-tags.query_alert} by {node_name})
* 1000, 0)
> ${var.fetch_latency_threshold_critical} > ${var.fetch_latency_threshold_critical}
EOQ EOQ