From 31aa187501afa09a93a29a62eb08a46e5856b9c2 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Tue, 23 Apr 2019 10:13:15 +0200 Subject: [PATCH] MON-326 harmonize queries EOQ end --- .../elasticsearch/monitors-elasticsearch.tf | 6 +-- .../aurora/mysql/monitors-rds-aurora-mysql.tf | 2 +- .../monitors-rds-aurora-postgresql.tf | 2 +- cloud/aws/rds/common/monitors-rds-common.tf | 6 +-- .../datalakestore/monitors-datalakestore.tf | 2 +- cloud/azure/redis/monitors-azure-redis.tf | 6 +-- .../azure/servicebus/monitors-service-bus.tf | 2 +- cloud/azure/storage/monitors-azure-storage.tf | 18 ++++---- cloud/gcp/big-query/monitors-big-query.tf | 18 ++++---- .../common/monitors-cloud-sql-common.tf | 12 ++--- .../mysql/monitors-cloudsql-mysql.tf | 2 +- .../gcp/gce/instance/monitors-gce-instance.tf | 6 +-- cloud/gcp/lb/monitors-lb.tf | 10 ++--- cloud/gcp/pubsub/monitors-pubsub.tf | 4 +- .../elasticsearch/monitors-elasticsearch.tf | 44 +++++++++---------- database/redis/monitors-redis.tf | 18 ++++---- 16 files changed, 79 insertions(+), 79 deletions(-) diff --git a/cloud/aws/elasticsearch/monitors-elasticsearch.tf b/cloud/aws/elasticsearch/monitors-elasticsearch.tf index d781840..263699c 100644 --- a/cloud/aws/elasticsearch/monitors-elasticsearch.tf +++ b/cloud/aws/elasticsearch/monitors-elasticsearch.tf @@ -15,7 +15,7 @@ resource "datadog_monitor" "es_cluster_status" { avg:aws.es.cluster_statusred${module.filter-tags.query_alert} by {region,name} * 2 + (avg:aws.es.cluster_statusyellow${module.filter-tags.query_alert} by {region,name} + 0.1) ) >= 2 -EOQ + EOQ thresholds { warning = 1 @@ -50,7 +50,7 @@ resource "datadog_monitor" "es_free_space_low" { avg:aws.es.free_storage_space${module.filter-tags.query_alert} by {region,name} / (${var.es_cluster_volume_size}*1000) * 100 ) < ${var.diskspace_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.diskspace_threshold_warning}" @@ -84,7 +84,7 @@ resource "datadog_monitor" "es_cpu_90_15min" { ${var.cpu_time_aggregator}(${var.cpu_timeframe}): ( avg:aws.es.cpuutilization${module.filter-tags.query_alert} by {region,name} ) > ${var.cpu_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.cpu_threshold_warning}" diff --git a/cloud/aws/rds/aurora/mysql/monitors-rds-aurora-mysql.tf b/cloud/aws/rds/aurora/mysql/monitors-rds-aurora-mysql.tf index 56e12a3..087ee64 100644 --- a/cloud/aws/rds/aurora/mysql/monitors-rds-aurora-mysql.tf +++ b/cloud/aws/rds/aurora/mysql/monitors-rds-aurora-mysql.tf @@ -10,7 +10,7 @@ resource "datadog_monitor" "rds_aurora_mysql_replica_lag" { avg(${var.aurora_replicalag_timeframe}): ( avg:aws.rds.aurora_replica_lag${module.filter-tags.query_alert} by {region,name} ) > ${var.aurora_replicalag_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.aurora_replicalag_threshold_warning}" diff --git a/cloud/aws/rds/aurora/postgresql/monitors-rds-aurora-postgresql.tf b/cloud/aws/rds/aurora/postgresql/monitors-rds-aurora-postgresql.tf index 2da7253..57248ac 100644 --- a/cloud/aws/rds/aurora/postgresql/monitors-rds-aurora-postgresql.tf +++ b/cloud/aws/rds/aurora/postgresql/monitors-rds-aurora-postgresql.tf @@ -10,7 +10,7 @@ resource "datadog_monitor" "rds_aurora_postgresql_replica_lag" { avg(${var.aurora_replicalag_timeframe}): ( avg:aws.rds.rdsto_aurora_postgre_sqlreplica_lag${module.filter-tags.query_alert} by {region,name} ) > ${var.aurora_replicalag_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.aurora_replicalag_threshold_warning}" diff --git a/cloud/aws/rds/common/monitors-rds-common.tf b/cloud/aws/rds/common/monitors-rds-common.tf index d78581b..369b896 100644 --- a/cloud/aws/rds/common/monitors-rds-common.tf +++ b/cloud/aws/rds/common/monitors-rds-common.tf @@ -10,7 +10,7 @@ resource "datadog_monitor" "rds_cpu_90_15min" { ${var.cpu_time_aggregator}(${var.cpu_timeframe}): ( avg:aws.rds.cpuutilization${module.filter-tags.query_alert} by {region,name} ) > ${var.cpu_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.cpu_threshold_warning}" @@ -44,7 +44,7 @@ resource "datadog_monitor" "rds_free_space_low" { avg:aws.rds.free_storage_space${module.filter-tags.query_alert} by {region,name} / avg:aws.rds.total_storage_space${module.filter-tags.query_alert} by {region,name} * 100 ) < ${var.diskspace_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.diskspace_threshold_warning}" @@ -77,7 +77,7 @@ resource "datadog_monitor" "rds_replica_lag" { avg(${var.replicalag_timeframe}): ( avg:aws.rds.replica_lag${module.filter-tags.query_alert} by {region,name} ) > ${var.replicalag_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.replicalag_threshold_warning}" diff --git a/cloud/azure/datalakestore/monitors-datalakestore.tf b/cloud/azure/datalakestore/monitors-datalakestore.tf index 721fb75..82fc3ec 100644 --- a/cloud/azure/datalakestore/monitors-datalakestore.tf +++ b/cloud/azure/datalakestore/monitors-datalakestore.tf @@ -8,7 +8,7 @@ resource "datadog_monitor" "datalakestore_status" { ${var.status_time_aggregator}(${var.status_timeframe}): ( avg:azure.datalakestore_accounts.status${module.filter-tags.query_alert} by {resource_group,region,name} ) < 1 -EOQ + EOQ type = "metric alert" diff --git a/cloud/azure/redis/monitors-azure-redis.tf b/cloud/azure/redis/monitors-azure-redis.tf index 2d60d72..fd3242a 100644 --- a/cloud/azure/redis/monitors-azure-redis.tf +++ b/cloud/azure/redis/monitors-azure-redis.tf @@ -35,7 +35,7 @@ resource "datadog_monitor" "evictedkeys" { ${var.evictedkeys_limit_time_aggregator}(${var.evictedkeys_limit_timeframe}): ( avg:azure.cache_redis.evictedkeys${module.filter-tags.query_alert} by {resource_group,region,name} ) > ${var.evictedkeys_limit_threshold_critical} -EOQ + EOQ type = "metric alert" @@ -68,7 +68,7 @@ resource "datadog_monitor" "percent_processor_time" { ${var.percent_processor_time_time_aggregator}(${var.percent_processor_time_timeframe}): ( avg:azure.cache_redis.percent_processor_time${module.filter-tags.query_alert} by {resource_group,region,name} ) > ${var.percent_processor_time_threshold_critical} -EOQ + EOQ type = "metric alert" @@ -101,7 +101,7 @@ resource "datadog_monitor" "server_load" { ${var.server_load_rate_time_aggregator}(${var.server_load_rate_timeframe}): ( avg:azure.cache_redis.server_load${module.filter-tags.query_alert} by {resource_group,region,name} ) > ${var.server_load_rate_threshold_critical} -EOQ + EOQ type = "metric alert" diff --git a/cloud/azure/servicebus/monitors-service-bus.tf b/cloud/azure/servicebus/monitors-service-bus.tf index c0a004b..f1bb8b4 100644 --- a/cloud/azure/servicebus/monitors-service-bus.tf +++ b/cloud/azure/servicebus/monitors-service-bus.tf @@ -8,7 +8,7 @@ resource "datadog_monitor" "servicebus_status" { ${var.status_time_aggregator}(${var.status_timeframe}): ( avg:azure.servicebus_namespaces.status${module.filter-tags.query_alert} by {resource_group,region,name} ) != 1 -EOQ + EOQ type = "metric alert" diff --git a/cloud/azure/storage/monitors-azure-storage.tf b/cloud/azure/storage/monitors-azure-storage.tf index 5928a55..7c027f8 100644 --- a/cloud/azure/storage/monitors-azure-storage.tf +++ b/cloud/azure/storage/monitors-azure-storage.tf @@ -7,7 +7,7 @@ resource "datadog_monitor" "availability" { ${var.availability_time_aggregator}(${var.availability_timeframe}): (default( avg:azure.storage.availability${module.filter-tags.query_alert} by {resource_group,storage_type,name}, 100)) < ${var.availability_threshold_critical} -EOQ + EOQ thresholds { critical = "${var.availability_threshold_critical}" @@ -39,7 +39,7 @@ resource "datadog_monitor" "successful_requests" { ${var.successful_requests_time_aggregator}(${var.successful_requests_timeframe}): (default( avg:azure.storage.percent_success${module.filter-tags.query_alert} by {resource_group,storage_type,name}, 100)) < ${var.successful_requests_threshold_critical} -EOQ + EOQ thresholds { critical = "${var.successful_requests_threshold_critical}" @@ -71,7 +71,7 @@ resource "datadog_monitor" "latency" { ${var.latency_time_aggregator}(${var.latency_timeframe}): (default( avg:azure.storage.average_e2_e_latency${module.filter-tags.query_alert} by {resource_group,storage_type,name}, 0)) > ${var.latency_threshold_critical} -EOQ + EOQ thresholds { critical = "${var.latency_threshold_critical}" @@ -103,7 +103,7 @@ resource "datadog_monitor" "timeout_error_requests" { ${var.timeout_error_requests_time_aggregator}(${var.timeout_error_requests_timeframe}): (default( avg:azure.storage.percent_timeout_error${module.filter-tags.query_alert} by {resource_group,storage_type,name}, 0)) > ${var.timeout_error_requests_threshold_critical} -EOQ + EOQ thresholds { critical = "${var.timeout_error_requests_threshold_critical}" @@ -135,7 +135,7 @@ resource "datadog_monitor" "network_error_requests" { ${var.network_error_requests_time_aggregator}(${var.network_error_requests_timeframe}): (default( avg:azure.storage.percent_network_error${module.filter-tags.query_alert} by {resource_group,storage_type,name}, 0)) > ${var.network_error_requests_threshold_critical} -EOQ + EOQ thresholds { critical = "${var.network_error_requests_threshold_critical}" @@ -167,7 +167,7 @@ resource "datadog_monitor" "throttling_error_requests" { ${var.throttling_error_requests_time_aggregator}(${var.throttling_error_requests_timeframe}): (default( avg:azure.storage.percent_throttling_error${module.filter-tags.query_alert} by {resource_group,storage_type,name}, 0)) > ${var.throttling_error_requests_threshold_critical} -EOQ + EOQ thresholds { critical = "${var.throttling_error_requests_threshold_critical}" @@ -199,7 +199,7 @@ resource "datadog_monitor" "server_other_error_requests" { ${var.server_other_error_requests_time_aggregator}(${var.server_other_error_requests_timeframe}): (default( avg:azure.storage.percent_server_other_error${module.filter-tags.query_alert} by {resource_group,storage_type,name}, 0)) > ${var.server_other_error_requests_threshold_critical} -EOQ + EOQ thresholds { critical = "${var.server_other_error_requests_threshold_critical}" @@ -231,7 +231,7 @@ resource "datadog_monitor" "client_other_error_requests" { ${var.client_other_error_requests_time_aggregator}(${var.client_other_error_requests_timeframe}): (default( avg:azure.storage.percent_client_other_error${module.filter-tags.query_alert} by {resource_group,storage_type,name}, 0)) > ${var.client_other_error_requests_threshold_critical} -EOQ + EOQ thresholds { critical = "${var.client_other_error_requests_threshold_critical}" @@ -263,7 +263,7 @@ resource "datadog_monitor" "authorization_error_requests" { ${var.authorization_error_requests_time_aggregator}(${var.authorization_error_requests_timeframe}): (default( avg:azure.storage.percent_authorization_error${module.filter-tags.query_alert} by {resource_group,storage_type,name}, 0)) > ${var.authorization_error_requests_threshold_critical} -EOQ + EOQ thresholds { critical = "${var.authorization_error_requests_threshold_critical}" diff --git a/cloud/gcp/big-query/monitors-big-query.tf b/cloud/gcp/big-query/monitors-big-query.tf index 1352b95..3768c14 100644 --- a/cloud/gcp/big-query/monitors-big-query.tf +++ b/cloud/gcp/big-query/monitors-big-query.tf @@ -11,7 +11,7 @@ resource "datadog_monitor" "concurrent_queries" { query = < ${var.concurrent_queries_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.concurrent_queries_threshold_warning}" @@ -47,7 +47,7 @@ resource "datadog_monitor" "execution_time" { query = < ${var.execution_time_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.execution_time_threshold_warning}" @@ -83,7 +83,7 @@ resource "datadog_monitor" "scanned_bytes" { query = < ${var.scanned_bytes_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.scanned_bytes_threshold_warning}" @@ -119,7 +119,7 @@ resource "datadog_monitor" "scanned_bytes_billed" { query = < ${var.scanned_bytes_billed_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.scanned_bytes_billed_threshold_warning}" @@ -155,7 +155,7 @@ resource "datadog_monitor" "available_slots" { query = < ${var.stored_bytes_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.stored_bytes_threshold_warning}" @@ -227,7 +227,7 @@ resource "datadog_monitor" "table_count" { query = < ${var.table_count_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.table_count_threshold_warning}" @@ -263,7 +263,7 @@ resource "datadog_monitor" "uploaded_bytes" { query = < ${var.uploaded_bytes_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.uploaded_bytes_threshold_warning}" @@ -299,7 +299,7 @@ resource "datadog_monitor" "uploaded_bytes_billed" { query = < ${var.uploaded_bytes_billed_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.uploaded_bytes_billed_threshold_warning}" diff --git a/cloud/gcp/cloud-sql/common/monitors-cloud-sql-common.tf b/cloud/gcp/cloud-sql/common/monitors-cloud-sql-common.tf index 74ebd20..6069ab7 100644 --- a/cloud/gcp/cloud-sql/common/monitors-cloud-sql-common.tf +++ b/cloud/gcp/cloud-sql/common/monitors-cloud-sql-common.tf @@ -13,7 +13,7 @@ resource "datadog_monitor" "cpu_utilization" { avg:gcp.cloudsql.database.cpu.utilization{${var.filter_tags}} by {database_id} * 100 > ${var.cpu_utilization_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.cpu_utilization_threshold_warning}" @@ -51,7 +51,7 @@ resource "datadog_monitor" "disk_utilization" { avg:gcp.cloudsql.database.disk.utilization{${var.filter_tags}} by {database_id} * 100 > ${var.disk_utilization_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.disk_utilization_threshold_warning}" @@ -95,7 +95,7 @@ resource "datadog_monitor" "disk_utilization_forecast" { ${var.disk_utilization_forecast_algorithm == "seasonal" ? format("seasonality='%s'", var.disk_utilization_forecast_seasonal_seasonality): ""} ) >= ${var.disk_utilization_forecast_threshold_critical} -EOQ + EOQ thresholds { critical = "${var.disk_utilization_forecast_threshold_critical}" @@ -133,7 +133,7 @@ resource "datadog_monitor" "memory_utilization" { avg:gcp.cloudsql.database.memory.utilization{${var.filter_tags}} by {database_id} * 100 > ${var.memory_utilization_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.memory_utilization_threshold_warning}" @@ -177,7 +177,7 @@ resource "datadog_monitor" "memory_utilization_forecast" { ${var.memory_utilization_forecast_algorithm == "seasonal" ? format("seasonality='%s'", var.memory_utilization_forecast_seasonal_seasonality): ""} ) >= ${var.memory_utilization_forecast_threshold_critical} -EOQ + EOQ thresholds { critical = "${var.memory_utilization_forecast_threshold_critical}" @@ -215,7 +215,7 @@ resource "datadog_monitor" "failover_unavailable" { avg:gcp.cloudsql.database.available_for_failover{${var.filter_tags}} by {database_id} <= ${var.failover_unavailable_threshold_critical} -EOQ + EOQ thresholds { critical = "${var.failover_unavailable_threshold_critical}" diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index 9717cf3..938a6c5 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -13,7 +13,7 @@ resource "datadog_monitor" "replication_lag" { avg:gcp.cloudsql.database.mysql.replication.seconds_behind_master{${var.filter_tags}} by {database_id} > ${var.replication_lag_threshold_critical} -EOQ + EOQ thresholds { critical = "${var.replication_lag_threshold_critical}" diff --git a/cloud/gcp/gce/instance/monitors-gce-instance.tf b/cloud/gcp/gce/instance/monitors-gce-instance.tf index 4419819..b3d1033 100644 --- a/cloud/gcp/gce/instance/monitors-gce-instance.tf +++ b/cloud/gcp/gce/instance/monitors-gce-instance.tf @@ -12,7 +12,7 @@ resource "datadog_monitor" "cpu_utilization" { ${var.cpu_utilization_time_aggregator}(${var.cpu_utilization_timeframe}): avg:gcp.gce.instance.cpu.utilization{${var.filter_tags}} by {instance_name} * 100 > ${var.cpu_utilization_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.cpu_utilization_threshold_warning}" @@ -55,7 +55,7 @@ resource "datadog_monitor" "disk_throttled_bps" { sum:gcp.gce.instance.disk.write_bytes_count{${var.filter_tags}} by {instance_name, device_name} ) * 100 > ${var.disk_throttled_bps_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.disk_throttled_bps_threshold_warning}" @@ -98,7 +98,7 @@ resource "datadog_monitor" "disk_throttled_ops" { sum:gcp.gce.instance.disk.write_ops_count{${var.filter_tags}} by {instance_name, device_name} ) * 100 > ${var.disk_throttled_ops_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.disk_throttled_ops_threshold_warning}" diff --git a/cloud/gcp/lb/monitors-lb.tf b/cloud/gcp/lb/monitors-lb.tf index 835449c..bbef18e 100644 --- a/cloud/gcp/lb/monitors-lb.tf +++ b/cloud/gcp/lb/monitors-lb.tf @@ -13,7 +13,7 @@ resource "datadog_monitor" "error_rate_4xx" { default(sum:gcp.loadbalancing.https.request_count{${var.filter_tags},response_code_class:400} by {forwarding_rule_name}.as_rate(), 0) / ( default(sum:gcp.loadbalancing.https.request_count{${var.filter_tags}} by {forwarding_rule_name}.as_rate() + ${var.error_rate_4xx_artificial_request}, 1)) * 100 > ${var.error_rate_4xx_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.error_rate_4xx_threshold_warning}" @@ -51,7 +51,7 @@ resource "datadog_monitor" "error_rate_5xx" { default(sum:gcp.loadbalancing.https.request_count{${var.filter_tags},response_code_class:500} by {forwarding_rule_name}.as_rate(), 0) / ( default(sum:gcp.loadbalancing.https.request_count{${var.filter_tags}} by {forwarding_rule_name}.as_rate() + ${var.error_rate_5xx_artificial_request}, 1)) * 100 > ${var.error_rate_5xx_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.error_rate_5xx_threshold_warning}" @@ -88,7 +88,7 @@ resource "datadog_monitor" "backend_latency_service" { ${var.backend_latency_service_time_aggregator}(${var.backend_latency_service_timeframe}): default(min:gcp.loadbalancing.https.backend_latencies.avg{${var.filter_tags},backend_target_type:backend_service} by {backend_target_name,forwarding_rule_name}, 0) > ${var.backend_latency_service_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.backend_latency_service_threshold_warning}" @@ -125,7 +125,7 @@ resource "datadog_monitor" "backend_latency_bucket" { ${var.backend_latency_bucket_time_aggregator}(${var.backend_latency_bucket_timeframe}): default(min:gcp.loadbalancing.https.backend_latencies.avg{${var.filter_tags},backend_target_type:backend_bucket} by {backend_target_name,forwarding_rule_name}, 0) > ${var.backend_latency_bucket_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.backend_latency_bucket_threshold_warning}" @@ -162,7 +162,7 @@ resource "datadog_monitor" "request_count" { pct_change(${var.request_count_time_aggregator}(${var.request_count_timeframe}),${var.request_count_timeshift}): default(sum:gcp.loadbalancing.https.request_count{${var.filter_tags}} by {forwarding_rule_name}.as_count(), 0) > ${var.request_count_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.request_count_threshold_warning}" diff --git a/cloud/gcp/pubsub/monitors-pubsub.tf b/cloud/gcp/pubsub/monitors-pubsub.tf index 2f71227..faa4b01 100644 --- a/cloud/gcp/pubsub/monitors-pubsub.tf +++ b/cloud/gcp/pubsub/monitors-pubsub.tf @@ -12,7 +12,7 @@ resource "datadog_monitor" "sending_operations_count" { ${var.sending_operations_count_time_aggregator}(${var.sending_operations_count_timeframe}): default(avg:gcp.pubsub.topic.send_message_operation_count{${var.filter_tags}} by {topic_id}.as_count(), 0) <= ${var.sending_operations_count_threshold_critical} -EOQ + EOQ thresholds { critical = "${var.sending_operations_count_threshold_critical}" @@ -48,7 +48,7 @@ resource "datadog_monitor" "unavailable_sending_operations_count" { ${var.unavailable_sending_operations_count_time_aggregator}(${var.unavailable_sending_operations_count_timeframe}): default(avg:gcp.pubsub.topic.send_message_operation_count{${var.filter_tags},response_code:unavailable} by {topic_id}.as_count(), 0) >= ${var.unavailable_sending_operations_count_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.unavailable_sending_operations_count_threshold_warning}" diff --git a/database/elasticsearch/monitors-elasticsearch.tf b/database/elasticsearch/monitors-elasticsearch.tf index 109dda6..3607114 100644 --- a/database/elasticsearch/monitors-elasticsearch.tf +++ b/database/elasticsearch/monitors-elasticsearch.tf @@ -8,7 +8,7 @@ resource "datadog_monitor" "not_responding" { query = < ${var.cluster_initializing_shards_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.cluster_initializing_shards_threshold_warning}" @@ -140,7 +140,7 @@ resource "datadog_monitor" "cluster_relocating_shards" { ${var.cluster_relocating_shards_time_aggregator}(${var.cluster_relocating_shards_timeframe}): avg:elasticsearch.relocating_shards${module.filter-tags.query_alert} by {cluster_name} > ${var.cluster_relocating_shards_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.cluster_relocating_shards_threshold_warning}" @@ -182,7 +182,7 @@ resource "datadog_monitor" "cluster_unassigned_shards" { ${var.cluster_unassigned_shards_time_aggregator}(${var.cluster_unassigned_shards_timeframe}): avg:elasticsearch.unassigned_shards${module.filter-tags.query_alert} by {cluster_name} > ${var.cluster_unassigned_shards_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.cluster_unassigned_shards_threshold_warning}" @@ -227,7 +227,7 @@ resource "datadog_monitor" "node_free_space" { min:elasticsearch.fs.total.total_in_bytes${module.filter-tags.query_alert} by {node_name} ) * 100 < ${var.node_free_space_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.node_free_space_threshold_warning}" @@ -269,7 +269,7 @@ resource "datadog_monitor" "jvm_heap_memory_usage" { ${var.jvm_heap_memory_usage_time_aggregator}(${var.jvm_heap_memory_usage_timeframe}): avg:jvm.mem.heap_in_use${module.filter-tags.query_alert} by {node_name} > ${var.jvm_heap_memory_usage_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.jvm_heap_memory_usage_threshold_warning}" @@ -311,7 +311,7 @@ resource "datadog_monitor" "jvm_memory_young_usage" { ${var.jvm_memory_young_usage_time_aggregator}(${var.jvm_memory_young_usage_timeframe}): avg:jvm.mem.pools.young.used${module.filter-tags.query_alert} by {node_name} / avg:jvm.mem.pools.young.max${module.filter-tags.query_alert} by {node_name} * 100 > ${var.jvm_memory_young_usage_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.jvm_memory_young_usage_threshold_warning}" @@ -353,7 +353,7 @@ resource "datadog_monitor" "jvm_memory_old_usage" { ${var.jvm_memory_old_usage_time_aggregator}(${var.jvm_memory_old_usage_timeframe}): avg:jvm.mem.pools.old.used${module.filter-tags.query_alert} by {node_name} / avg:jvm.mem.pools.old.max${module.filter-tags.query_alert} by {node_name} * 100 > ${var.jvm_memory_old_usage_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.jvm_memory_old_usage_threshold_warning}" @@ -395,7 +395,7 @@ resource "datadog_monitor" "jvm_gc_old_collection_latency" { ${var.jvm_gc_old_collection_latency_time_aggregator}(${var.jvm_gc_old_collection_latency_timeframe}): avg:jvm.gc.collectors.old.collection_time${module.filter-tags.query_alert} by {node_name} / avg:jvm.gc.collectors.old.count${module.filter-tags.query_alert} by {node_name} * 1000 > ${var.jvm_gc_old_collection_latency_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.jvm_gc_old_collection_latency_threshold_warning}" @@ -437,7 +437,7 @@ resource "datadog_monitor" "jvm_gc_young_collection_latency" { ${var.jvm_gc_young_collection_latency_time_aggregator}(${var.jvm_gc_young_collection_latency_timeframe}): avg:jvm.gc.collectors.young.collection_time${module.filter-tags.query_alert} by {node_name} / avg:jvm.gc.collectors.young.count${module.filter-tags.query_alert} by {node_name} * 1000 > ${var.jvm_gc_young_collection_latency_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.jvm_gc_young_collection_latency_threshold_warning}" @@ -480,7 +480,7 @@ resource "datadog_monitor" "indexing_latency" { ${var.indexing_latency_time_aggregator}(${var.indexing_latency_timeframe}): avg:elasticsearch.indexing.index.time${module.filter-tags.query_alert} by {node_name}/ avg:elasticsearch.indexing.index.total${module.filter-tags.query_alert} by {node_name} * 1000 > ${var.indexing_latency_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.indexing_latency_threshold_warning}" @@ -523,7 +523,7 @@ resource "datadog_monitor" "flush_latency" { ${var.flush_latency_time_aggregator}(${var.flush_latency_timeframe}): avg:elasticsearch.flush.total.time${module.filter-tags.query_alert} by {node_name} / avg:elasticsearch.flush.total${module.filter-tags.query_alert} by {node_name} * 1000 > ${var.flush_latency_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.flush_latency_threshold_warning}" @@ -573,7 +573,7 @@ resource "datadog_monitor" "http_connections_anomaly" { seasonality='${var.http_connections_anomaly_seasonality}' ) >= ${var.http_connections_anomaly_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.http_connections_anomaly_threshold_warning}" @@ -616,7 +616,7 @@ resource "datadog_monitor" "search_query_latency" { ${var.search_query_latency_time_aggregator}(${var.search_query_latency_timeframe}): avg:elasticsearch.search.query.time${module.filter-tags.query_alert} by {node_name} / avg:elasticsearch.search.query.total${module.filter-tags.query_alert} by {node_name} * 1000 > ${var.search_query_latency_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.search_query_latency_threshold_warning}" @@ -659,7 +659,7 @@ resource "datadog_monitor" "fetch_latency" { ${var.fetch_latency_time_aggregator}(${var.fetch_latency_timeframe}): avg:elasticsearch.search.fetch.time${module.filter-tags.query_alert} by {node_name} / avg:elasticsearch.search.fetch.total${module.filter-tags.query_alert} by {node_name} * 1000 > ${var.fetch_latency_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.fetch_latency_threshold_warning}" @@ -701,7 +701,7 @@ resource "datadog_monitor" "search_query_change" { pct_change(${var.search_query_change_time_aggregator}(${var.search_query_change_timeframe}),${var.search_query_change_timeshift}): avg:elasticsearch.search.query.current${module.filter-tags.query_alert} by {cluster_name} >= ${var.search_query_change_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.search_query_change_threshold_warning}" @@ -743,7 +743,7 @@ resource "datadog_monitor" "fetch_change" { pct_change(${var.fetch_change_time_aggregator}(${var.fetch_change_timeframe}),${var.fetch_change_timeshift}): avg:elasticsearch.search.fetch.current${module.filter-tags.query_alert} by {cluster_name} >= ${var.fetch_change_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.fetch_change_threshold_warning}" @@ -786,7 +786,7 @@ resource "datadog_monitor" "field_data_evictions_change" { change(${var.field_data_evictions_change_time_aggregator}(${var.field_data_evictions_change_timeframe}),${var.field_data_evictions_change_timeshift}): avg:elasticsearch.fielddata.evictions${module.filter-tags.query_alert} by {node_name} > ${var.field_data_evictions_change_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.field_data_evictions_change_threshold_warning}" @@ -829,7 +829,7 @@ resource "datadog_monitor" "query_cache_evictions_change" { change(${var.query_cache_evictions_change_time_aggregator}(${var.query_cache_evictions_change_timeframe}),${var.query_cache_evictions_change_timeshift}): avg:elasticsearch.indices.query_cache.evictions${module.filter-tags.query_alert} by {node_name} > ${var.query_cache_evictions_change_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.query_cache_evictions_change_threshold_warning}" @@ -872,7 +872,7 @@ resource "datadog_monitor" "request_cache_evictions_change" { change(${var.request_cache_evictions_change_time_aggregator}(${var.request_cache_evictions_change_timeframe}),${var.request_cache_evictions_change_timeshift}): avg:elasticsearch.indices.request_cache.evictions${module.filter-tags.query_alert} by {node_name} > ${var.request_cache_evictions_change_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.request_cache_evictions_change_threshold_warning}" @@ -914,7 +914,7 @@ resource "datadog_monitor" "task_time_in_queue_change" { change(${var.task_time_in_queue_change_time_aggregator}(${var.task_time_in_queue_change_timeframe}),${var.task_time_in_queue_change_timeshift}): avg:elasticsearch.pending_tasks_time_in_queue${module.filter-tags.query_alert} by {cluster_name} > ${var.task_time_in_queue_change_threshold_critical} -EOQ + EOQ thresholds { warning = "${var.task_time_in_queue_change_threshold_warning}" diff --git a/database/redis/monitors-redis.tf b/database/redis/monitors-redis.tf index 22079bf..6fe3856 100644 --- a/database/redis/monitors-redis.tf +++ b/database/redis/monitors-redis.tf @@ -42,7 +42,7 @@ resource "datadog_monitor" "evicted_keys" { change(${var.evictedkeys_change_time_aggregator}(${var.evictedkeys_change_timeframe}),${var.evictedkeys_change_timeframe}): ( avg:redis.keys.evicted${module.filter-tags.query_alert} by {redis_host,redis_port} ) > ${var.evictedkeys_change_threshold_critical} -EOQ + EOQ type = "metric alert" @@ -75,7 +75,7 @@ resource "datadog_monitor" "expirations" { ${var.expirations_rate_time_aggregator}(${var.expirations_rate_timeframe}): ( avg:redis.expires.percent${module.filter-tags.query_alert} by {redis_host,redis_port} ) > ${var.expirations_rate_threshold_critical} -EOQ + EOQ type = "metric alert" @@ -109,7 +109,7 @@ resource "datadog_monitor" "blocked_clients" { sum:redis.clients.blocked${module.filter-tags.query_alert} by {redis_host,redis_port} / sum:redis.net.clients${module.filter-tags.query_alert} by {redis_host,redis_port} ) * 100 > ${var.blocked_clients_threshold_critical} -EOQ + EOQ type = "metric alert" @@ -142,7 +142,7 @@ resource "datadog_monitor" "keyspace_full" { ${var.keyspace_time_aggregator}(${var.keyspace_timeframe}): ( abs(diff(avg:redis.keys${module.filter-tags.query_alert} by {redis_host,redis_port})) ) == ${var.keyspace_threshold_critical} -EOQ + EOQ type = "metric alert" @@ -176,7 +176,7 @@ resource "datadog_monitor" "memory_used" { avg:redis.mem.used${module.filter-tags.query_alert} by {redis_host,redis_port} / max:redis.mem.maxmemory${module.filter-tags.query_alert} by {redis_host,redis_port} ) * 100 > ${var.mem_used_threshold_critical} -EOQ + EOQ type = "metric alert" @@ -209,7 +209,7 @@ resource "datadog_monitor" "memory_frag" { ${var.mem_frag_time_aggregator}(${var.mem_frag_timeframe}): avg:redis.mem.fragmentation_ratio${module.filter-tags.query_alert} by {redis_host,redis_port} * 100 > ${var.mem_frag_threshold_critical} -EOQ + EOQ type = "metric alert" @@ -242,7 +242,7 @@ resource "datadog_monitor" "rejected_connections" { change(${var.rejected_con_time_aggregator}(${var.rejected_con_timeframe}),${var.rejected_con_timeframe}): ( avg:redis.net.rejected${module.filter-tags.query_alert} by {redis_host,redis_port} ) > ${var.rejected_con_threshold_critical} -EOQ + EOQ type = "metric alert" @@ -275,7 +275,7 @@ resource "datadog_monitor" "latency" { change(${var.latency_time_aggregator}(${var.latency_timeframe}),${var.latency_timeframe}): ( avg:redis.info.latency_ms${module.filter-tags.query_alert} by {redis_host,redis_port} ) > ${var.latency_threshold_critical} -EOQ + EOQ type = "metric alert" @@ -310,7 +310,7 @@ resource "datadog_monitor" "hitrate" { / (sum:redis.stats.keyspace_hits${module.filter-tags.query_alert} by {redis_host,redis_port} + sum:redis.stats.keyspace_misses${module.filter-tags.query_alert} by {redis_host,redis_port}) ) * 100 < ${var.hitrate_threshold_critical} -EOQ + EOQ type = "metric alert"