MON-486 run auto update with new fmt fix in 0.12.6
This commit is contained in:
parent
c16e807c1a
commit
a8af464794
@ -66,7 +66,7 @@ resource "datadog_monitor" "memory_pressure" {
|
||||
message = coalesce(var.memory_pressure_message, var.message)
|
||||
type = "service check"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
"kubernetes_state.node.memory_pressure"${module.filter-tags.service_check}.by("kubernetescluster","node").last(6).count_by_status()
|
||||
EOQ
|
||||
|
||||
@ -97,7 +97,7 @@ resource "datadog_monitor" "ready" {
|
||||
message = coalesce(var.ready_message, var.message)
|
||||
type = "service check"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
"kubernetes_state.node.ready"${module.filter-tags.service_check}.by("kubernetescluster","node").last(6).count_by_status()
|
||||
EOQ
|
||||
|
||||
@ -190,7 +190,7 @@ resource "datadog_monitor" "unregister_net_device" {
|
||||
message = coalesce(var.unregister_net_device_message, var.message)
|
||||
type = "event alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
events('sources:kubernetes priority:all ${module.filter-tags.event_alert} \"UnregisterNetDevice\"').rollup('count').last('${var.unregister_net_device_timeframe}') > ${var.unregister_net_device_threshold_critical}
|
||||
EOQ
|
||||
|
||||
@ -215,7 +215,7 @@ resource "datadog_monitor" "node_unschedulable" {
|
||||
message = coalesce(var.node_unschedulable_message, var.message)
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.node_unschedulable_time_aggregator}(${var.node_unschedulable_timeframe}):
|
||||
sum:kubernetes_state.node.status${module.filter-tags-unschedulable.query_alert} by {kubernetescluster,node}
|
||||
> 0
|
||||
|
||||
@ -66,7 +66,7 @@ resource "datadog_monitor" "replica_available" {
|
||||
message = coalesce(var.replica_available_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.replica_available_time_aggregator}(${var.replica_available_timeframe}):
|
||||
max:kubernetes_state.deployment.replicas_desired${module.filter-tags.query_alert} by {namespace, deployment} -
|
||||
max:kubernetes_state.deployment.replicas_available${module.filter-tags.query_alert} by {namespace, deployment}
|
||||
@ -100,7 +100,7 @@ resource "datadog_monitor" "replica_ready" {
|
||||
message = coalesce(var.replica_ready_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.replica_available_time_aggregator}(${var.replica_available_timeframe}):
|
||||
max:kubernetes_state.replicaset.replicas_desired${module.filter-tags.query_alert} by {namespace, replicaset} -
|
||||
max:kubernetes_state.replicaset.replicas_ready${module.filter-tags.query_alert} by {namespace, replicaset}
|
||||
|
||||
@ -70,7 +70,7 @@ resource "datadog_monitor" "ALB_httpcode_5xx" {
|
||||
message = coalesce(var.httpcode_alb_5xx_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.httpcode_alb_5xx_time_aggregator}(${var.httpcode_alb_5xx_timeframe}):
|
||||
default(avg:aws.applicationelb.httpcode_elb_5xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_rate(), 0) / (
|
||||
default(avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_rate() + ${var.artificial_requests_count}, 1))
|
||||
@ -103,7 +103,7 @@ resource "datadog_monitor" "ALB_httpcode_4xx" {
|
||||
message = coalesce(var.httpcode_alb_4xx_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.httpcode_alb_4xx_time_aggregator}(${var.httpcode_alb_4xx_timeframe}):
|
||||
default(avg:aws.applicationelb.httpcode_elb_4xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_rate(), 0) / (
|
||||
default(avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_rate() + ${var.artificial_requests_count}, 1))
|
||||
|
||||
@ -72,7 +72,7 @@ resource "datadog_monitor" "API_http_4xx_errors_count" {
|
||||
message = coalesce(var.http_4xx_requests_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.http_4xx_requests_time_aggregator}(${var.http_4xx_requests_timeframe}):
|
||||
default(avg:aws.apigateway.4xxerror{${var.filter_tags}} by {region,apiname,stage}.as_rate(), 0) / (
|
||||
default(avg:aws.apigateway.count{${var.filter_tags}} by {region,apiname,stage}.as_rate() + ${var.artificial_requests_count}, 1))
|
||||
|
||||
@ -67,7 +67,7 @@ resource "datadog_monitor" "elasticache_no_connection" {
|
||||
message = coalesce(var.no_connection_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.no_connection_time_aggregator}(${var.no_connection_timeframe}): (
|
||||
avg:aws.elasticache.curr_connections${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
) <= 0
|
||||
@ -96,7 +96,7 @@ resource "datadog_monitor" "elasticache_swap" {
|
||||
message = coalesce(var.swap_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.swap_time_aggregator}(${var.swap_timeframe}): (
|
||||
avg:aws.elasticache.swap_usage${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
) > ${var.swap_threshold_critical}
|
||||
|
||||
@ -69,7 +69,7 @@ resource "datadog_monitor" "redis_replication_lag" {
|
||||
message = coalesce(var.replication_lag_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.replication_lag_time_aggregator}(${var.replication_lag_timeframe}): (
|
||||
avg:aws.elasticache.replication_lag${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
) > ${var.replication_lag_threshold_critical}
|
||||
@ -103,7 +103,7 @@ resource "datadog_monitor" "redis_commands" {
|
||||
message = coalesce(var.commands_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
sum(${var.commands_timeframe}): (
|
||||
avg:aws.elasticache.get_type_cmds${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}.as_count() +
|
||||
avg:aws.elasticache.set_type_cmds${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}.as_count()
|
||||
|
||||
@ -81,7 +81,7 @@ resource "datadog_monitor" "es_cpu_90_15min" {
|
||||
message = coalesce(var.cpu_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.cpu_time_aggregator}(${var.cpu_timeframe}): (
|
||||
avg:aws.es.cpuutilization${module.filter-tags.query_alert} by {region,name}
|
||||
) > ${var.cpu_threshold_critical}
|
||||
|
||||
@ -75,7 +75,7 @@ resource "datadog_monitor" "ELB_too_much_5xx" {
|
||||
message = coalesce(var.elb_5xx_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
sum(${var.elb_5xx_timeframe}):
|
||||
default(avg:aws.elb.httpcode_elb_5xx${module.filter-tags.query_alert} by {region,loadbalancername}.as_rate(), 0) / (
|
||||
default(avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername}.as_rate() + ${var.artificial_requests_count}, 1))
|
||||
@ -110,7 +110,7 @@ resource "datadog_monitor" "ELB_too_much_4xx_backend" {
|
||||
message = coalesce(var.elb_backend_4xx_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
sum(${var.elb_backend_4xx_timeframe}):
|
||||
default(avg:aws.elb.httpcode_backend_4xx${module.filter-tags.query_alert} by {region,loadbalancername}.as_rate(), 0) / (
|
||||
default(avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername}.as_rate() + ${var.artificial_requests_count}, 1))
|
||||
|
||||
@ -74,7 +74,7 @@ resource "datadog_monitor" "rds_replica_lag" {
|
||||
message = coalesce(var.replicalag_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
avg(${var.replicalag_timeframe}): (
|
||||
avg:aws.rds.replica_lag${module.filter-tags.query_alert} by {region,name}
|
||||
) > ${var.replicalag_threshold_critical}
|
||||
|
||||
@ -70,7 +70,7 @@ resource "datadog_monitor" "apimgt_other_requests" {
|
||||
message = coalesce(var.other_requests_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.other_requests_time_aggregator}(${var.other_requests_timeframe}): (
|
||||
default(avg:azure.apimanagement_service.other_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) /
|
||||
default(avg:azure.apimanagement_service.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
|
||||
@ -105,7 +105,7 @@ resource "datadog_monitor" "apimgt_unauthorized_requests" {
|
||||
message = coalesce(var.unauthorized_requests_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.unauthorized_requests_time_aggregator}(${var.unauthorized_requests_timeframe}): (
|
||||
default(avg:azure.apimanagement_service.unauthorized_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) /
|
||||
default(avg:azure.apimanagement_service.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
|
||||
|
||||
@ -71,7 +71,7 @@ resource "datadog_monitor" "appservices_http_5xx_errors_count" {
|
||||
message = coalesce(var.http_5xx_requests_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.http_5xx_requests_time_aggregator}(${var.http_5xx_requests_timeframe}): (
|
||||
default(avg:azure.app_services.http5xx${module.filter-tags.query_alert} by {resource_group,region,name,instance}.as_rate(), 0) /
|
||||
default(avg:azure.app_services.requests${module.filter-tags.query_alert} by {resource_group,region,name,instance}.as_rate(), 1)
|
||||
@ -105,7 +105,7 @@ resource "datadog_monitor" "appservices_http_4xx_errors_count" {
|
||||
message = coalesce(var.http_4xx_requests_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.http_4xx_requests_time_aggregator}(${var.http_4xx_requests_timeframe}): (
|
||||
default(avg:azure.app_services.http4xx${module.filter-tags.query_alert} by {resource_group,region,name,instance}.as_rate(), 0) /
|
||||
default(avg:azure.app_services.requests${module.filter-tags.query_alert} by {resource_group,region,name,instance}.as_rate(), 1)
|
||||
|
||||
@ -82,7 +82,7 @@ resource "datadog_monitor" "cosmos_db_5xx_requests" {
|
||||
message = coalesce(var.cosmos_db_5xx_requests_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.cosmos_db_5xx_request_time_aggregator}(${var.cosmos_db_5xx_request_timeframe}): default( (
|
||||
default(sum:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "500")} by {resource_group,region,name,databasename,collectionname}.as_rate(), 0) +
|
||||
default(sum:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "503")} by {resource_group,region,name,databasename,collectionname}.as_rate(), 0)) /
|
||||
@ -119,7 +119,7 @@ resource "datadog_monitor" "cosmos_db_scaling" {
|
||||
type = "query alert"
|
||||
|
||||
# List of available status codes : https://docs.microsoft.com/en-us/rest/api/cosmos-db/http-status-codes-for-cosmosdb
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.cosmos_db_scaling_time_aggregator}(${var.cosmos_db_scaling_timeframe}): default(
|
||||
default(sum:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "429")} by {resource_group,region,name,databasename,collectionname}.as_rate(), 0) /
|
||||
default(sum:azure.cosmosdb.total_requests${module.filter-tags.query_alert} by {resource_group,region,name,databasename,collectionname}.as_rate(), 0)
|
||||
|
||||
@ -71,7 +71,7 @@ resource "datadog_monitor" "eventgrid_unmatched_events" {
|
||||
message = coalesce(var.unmatched_events_rate_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.unmatched_events_rate_time_aggregator}(${var.unmatched_events_rate_timeframe}): (default(
|
||||
avg:azure.eventgrid_topics.unmatched_event_count${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate() /
|
||||
(avg:azure.eventgrid_topics.publish_success_count${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate() +
|
||||
|
||||
@ -68,7 +68,7 @@ resource "datadog_monitor" "eventhub_errors" {
|
||||
message = coalesce(var.errors_rate_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.errors_rate_time_aggregator}(${var.errors_rate_timeframe}): ( (
|
||||
default(avg:azure.eventhub_namespaces.internal_server_errors${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
|
||||
default(avg:azure.eventhub_namespaces.server_busy_errors${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
|
||||
|
||||
@ -69,7 +69,7 @@ resource "datadog_monitor" "function_high_threads_count" {
|
||||
message = coalesce(var.high_threads_count_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.high_threads_count_time_aggregator}(${var.high_threads_count_timeframe}):
|
||||
default(azure.functions.thread_count${module.filter-tags.query_alert} by {resource_group,region,name,instance}.as_rate(), 0)
|
||||
> ${var.high_threads_count_threshold_critical}
|
||||
|
||||
@ -78,7 +78,7 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
|
||||
message = coalesce(var.failed_queryjobs_rate_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.failed_queryjobs_rate_time_aggregator}(${var.failed_queryjobs_rate_timeframe}):
|
||||
default(
|
||||
default(avg:azure.devices_iothubs.jobs.query_jobs.failure${module.filter-tags.query_alert} by {resource_group,name}.as_rate(), 0) / (
|
||||
@ -115,7 +115,7 @@ resource "datadog_monitor" "status" {
|
||||
message = coalesce(var.status_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.status_time_aggregator}(${var.status_timeframe}): (
|
||||
avg:azure.devices_iothubs.status${module.filter-tags.query_alert} by {resource_group,region,name}
|
||||
) < 1
|
||||
@ -210,7 +210,7 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
|
||||
message = coalesce(var.failed_c2d_twin_read_rate_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.failed_c2d_twin_read_rate_time_aggregator}(${var.failed_c2d_twin_read_rate_timeframe}):
|
||||
default(
|
||||
default(avg:azure.devices_iothubs.c2d.twin.read.failure${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) / (
|
||||
@ -247,7 +247,7 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
|
||||
message = coalesce(var.failed_c2d_twin_update_rate_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.failed_c2d_twin_update_rate_time_aggregator}(${var.failed_c2d_twin_update_rate_timeframe}):
|
||||
default(
|
||||
default(avg:azure.devices_iothubs.c2d.twin.update.failure${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) / (
|
||||
@ -358,7 +358,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
|
||||
message = coalesce(var.dropped_d2c_telemetry_egress_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.dropped_d2c_telemetry_egress_time_aggregator}(${var.dropped_d2c_telemetry_egress_timeframe}):
|
||||
default(
|
||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) / (
|
||||
@ -397,7 +397,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
|
||||
message = coalesce(var.orphaned_d2c_telemetry_egress_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.orphaned_d2c_telemetry_egress_time_aggregator}(${var.orphaned_d2c_telemetry_egress_timeframe}):
|
||||
default(
|
||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) / (
|
||||
|
||||
@ -70,7 +70,7 @@ resource "datadog_monitor" "keyvault_api_latency" {
|
||||
message = coalesce(var.status_message, var.message)
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.api_latency_time_aggregator}(${var.api_latency_timeframe}):
|
||||
avg:azure.keyvault_vaults.service_api_latency${module.filter-tags-activity.query_alert} by {name,resource_group,region}
|
||||
> ${var.api_latency_threshold_critical}
|
||||
|
||||
@ -72,7 +72,7 @@ resource "datadog_monitor" "mysql_io_consumption" {
|
||||
message = coalesce(var.io_consumption_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.io_consumption_time_aggregator}(${var.io_consumption_timeframe}): (
|
||||
avg:azure.dbformysql_servers.io_consumption_percent${module.filter-tags.query_alert} by {resource_group,region,name}
|
||||
) > ${var.io_consumption_threshold_critical}
|
||||
@ -106,7 +106,7 @@ resource "datadog_monitor" "mysql_memory_usage" {
|
||||
message = coalesce(var.memory_usage_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.memory_usage_time_aggregator}(${var.memory_usage_timeframe}): (
|
||||
avg:azure.dbformysql_servers.memory_percent${module.filter-tags.query_alert} by {resource_group,region,name}
|
||||
) > ${var.memory_usage_threshold_critical}
|
||||
|
||||
@ -67,7 +67,7 @@ resource "datadog_monitor" "postgresql_free_storage" {
|
||||
message = coalesce(var.free_storage_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.free_storage_time_aggregator}(${var.free_storage_timeframe}): (
|
||||
100 - avg:azure.dbforpostgresql_servers.storage_percent${module.filter-tags.query_alert} by {resource_group,region,name}
|
||||
) < ${var.free_storage_threshold_critical}
|
||||
@ -101,7 +101,7 @@ resource "datadog_monitor" "postgresql_io_consumption" {
|
||||
message = coalesce(var.io_consumption_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.io_consumption_time_aggregator}(${var.io_consumption_timeframe}): (
|
||||
avg:azure.dbforpostgresql_servers.io_consumption_percent${module.filter-tags.query_alert} by {resource_group,region,name}
|
||||
) > ${var.io_consumption_threshold_critical}
|
||||
|
||||
@ -67,7 +67,7 @@ resource "datadog_monitor" "percent_processor_time" {
|
||||
message = coalesce(var.percent_processor_time_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.percent_processor_time_time_aggregator}(${var.percent_processor_time_timeframe}): (
|
||||
avg:azure.cache_redis.percent_processor_time${module.filter-tags.query_alert} by {resource_group,region,name}
|
||||
) > ${var.percent_processor_time_threshold_critical}
|
||||
@ -101,7 +101,7 @@ resource "datadog_monitor" "server_load" {
|
||||
message = coalesce(var.server_load_rate_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.server_load_rate_time_aggregator}(${var.server_load_rate_timeframe}): (
|
||||
avg:azure.cache_redis.server_load${module.filter-tags.query_alert} by {resource_group,region,name}
|
||||
) > ${var.server_load_rate_threshold_critical}
|
||||
|
||||
@ -67,7 +67,7 @@ resource "datadog_monitor" "memory_percentage" {
|
||||
message = coalesce(var.memory_percentage_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.memory_percentage_time_aggregator}(${var.memory_percentage_timeframe}): (
|
||||
avg:azure.web_serverfarms.memory_percentage${module.filter-tags.query_alert} by {resource_group,region,name,instance}
|
||||
) > ${var.memory_percentage_threshold_critical}
|
||||
|
||||
@ -62,7 +62,7 @@ resource "datadog_monitor" "service_bus_user_errors" {
|
||||
message = coalesce(var.user_errors_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.user_errors_time_aggregator}(${var.user_errors_timeframe}): (
|
||||
default(avg:azure.servicebus_namespaces.user_errors.preview${module.filter-tags.query_alert} by {resource_group,region,name,entityname}, 0) /
|
||||
default(avg:azure.servicebus_namespaces.incoming_requests_preview${module.filter-tags.query_alert} by {resource_group,region,name,entityname}, 1)
|
||||
@ -97,7 +97,7 @@ resource "datadog_monitor" "service_bus_server_errors" {
|
||||
message = coalesce(var.server_errors_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.server_errors_time_aggregator}(${var.server_errors_timeframe}): (
|
||||
default(avg:azure.servicebus_namespaces.server_errors.preview${module.filter-tags.query_alert} by {resource_group,region,name,entityname}, 0) /
|
||||
default(avg:azure.servicebus_namespaces.incoming_requests_preview${module.filter-tags.query_alert} by {resource_group,region,name,entityname}, 1)
|
||||
|
||||
@ -67,7 +67,7 @@ resource "datadog_monitor" "sql-database_free_space_low" {
|
||||
message = coalesce(var.diskspace_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.diskspace_time_aggregator}(${var.diskspace_timeframe}): (
|
||||
avg:azure.sql_servers_databases.storage_percent${module.filter-tags.query_alert} by {resource_group,region,server_name,name}
|
||||
) > ${var.diskspace_threshold_critical}
|
||||
@ -101,7 +101,7 @@ resource "datadog_monitor" "sql-database_dtu_consumption_high" {
|
||||
message = coalesce(var.dtu_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.dtu_time_aggregator}(${var.dtu_timeframe}): (
|
||||
avg:azure.sql_servers_databases.dtu_consumption_percent${module.filter-tags.query_alert} by {resource_group,region,server_name,name}
|
||||
) > ${var.dtu_threshold_critical}
|
||||
|
||||
@ -72,7 +72,7 @@ resource "datadog_monitor" "sql_elasticpool_dtu_consumption_high" {
|
||||
message = coalesce(var.dtu_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.dtu_time_aggregator}(${var.dtu_timeframe}): (
|
||||
azure.sql_servers_elasticpools.dtu_consumption_percent${module.filter-tags.query_alert} by {resource_group,region,server_name,name}
|
||||
) > ${var.dtu_threshold_critical}
|
||||
|
||||
@ -939,7 +939,7 @@ resource "datadog_monitor" "blob_client_other_error_requests" {
|
||||
count = var.client_other_error_requests_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Azure Blob Storage too many client_other errors {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = coalesce(var.client_other_error_requests_message, var.message)
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.client_other_error_requests_time_aggregator}(${var.client_other_error_requests_timeframe}):
|
||||
default((default(sum:azure.storage_storageaccounts_blobservices.transactions${module.filter-tags-client-other-error.query_alert} by {name}.as_rate(),0) /
|
||||
default(sum:azure.storage_storageaccounts_blobservices.transactions${module.filter-tags.query_alert} by {name}.as_rate(),0)
|
||||
|
||||
@ -67,7 +67,7 @@ resource "datadog_monitor" "failed_function_requests" {
|
||||
message = coalesce(var.failed_function_requests_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.failed_function_requests_time_aggregator}(${var.failed_function_requests_timeframe}): (
|
||||
default(avg:azure.streamanalytics_streamingjobs.aml_callout_failed_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) /
|
||||
default(avg:azure.streamanalytics_streamingjobs.aml_callout_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
|
||||
@ -102,7 +102,7 @@ resource "datadog_monitor" "conversion_errors" {
|
||||
message = coalesce(var.conversion_errors_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.conversion_errors_time_aggregator}(${var.conversion_errors_timeframe}): (
|
||||
avg:azure.streamanalytics_streamingjobs.conversion_errors${module.filter-tags.query_alert} by {resource_group,region,name}
|
||||
) > ${var.conversion_errors_threshold_critical}
|
||||
|
||||
@ -67,7 +67,7 @@ resource "datadog_monitor" "virtualmachine_credit_cpu_remaining_too_low" {
|
||||
message = coalesce(var.cpu_remaining_rate_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.cpu_remaining_rate_time_aggregator}(${var.cpu_remaining_rate_timeframe}):
|
||||
default(
|
||||
default(avg:azure.vm.cpu_credits_remaining${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 100) / (
|
||||
|
||||
@ -81,7 +81,7 @@ resource "datadog_monitor" "scanned_bytes" {
|
||||
message = coalesce(var.scanned_bytes_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
avg(${var.scanned_bytes_timeframe}):
|
||||
default(avg:gcp.bigquery.query.scanned_bytes{${var.filter_tags}}, 0)
|
||||
> ${var.scanned_bytes_threshold_critical}
|
||||
@ -118,7 +118,7 @@ resource "datadog_monitor" "scanned_bytes_billed" {
|
||||
message = coalesce(var.scanned_bytes_billed_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
avg(${var.scanned_bytes_billed_timeframe}):
|
||||
default(avg:gcp.bigquery.query.scanned_bytes_billed{${var.filter_tags}}, 0)
|
||||
> ${var.scanned_bytes_billed_threshold_critical}
|
||||
@ -229,7 +229,7 @@ resource "datadog_monitor" "table_count" {
|
||||
message = coalesce(var.table_count_message, var.message)
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
avg(${var.table_count_timeframe}):
|
||||
avg:gcp.bigquery.storage.table_count{${var.filter_tags}} by {dataset_id}
|
||||
> ${var.table_count_threshold_critical}
|
||||
@ -266,7 +266,7 @@ resource "datadog_monitor" "uploaded_bytes" {
|
||||
message = coalesce(var.uploaded_bytes_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
avg(${var.uploaded_bytes_timeframe}):
|
||||
default(avg:gcp.bigquery.storage.uploaded_bytes{${var.filter_tags}} by {dataset_id,table}, 0)
|
||||
> ${var.uploaded_bytes_threshold_critical}
|
||||
|
||||
@ -81,7 +81,7 @@ resource "datadog_monitor" "disk_utilization_forecast" {
|
||||
message = coalesce(var.disk_utilization_forecast_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.disk_utilization_forecast_time_aggregator}(${var.disk_utilization_forecast_timeframe}):
|
||||
forecast(
|
||||
avg:gcp.cloudsql.database.disk.utilization{${var.filter_tags}} by {database_id} * 100,
|
||||
@ -89,38 +89,38 @@ query = <<EOQ
|
||||
${var.disk_utilization_forecast_deviations},
|
||||
interval='${var.disk_utilization_forecast_interval}',
|
||||
${var.disk_utilization_forecast_algorithm == "linear" ? format(
|
||||
"history='%s',model='%s'",
|
||||
var.disk_utilization_forecast_linear_history,
|
||||
var.disk_utilization_forecast_linear_model,
|
||||
) : ""}
|
||||
"history='%s',model='%s'",
|
||||
var.disk_utilization_forecast_linear_history,
|
||||
var.disk_utilization_forecast_linear_model,
|
||||
) : ""}
|
||||
${var.disk_utilization_forecast_algorithm == "seasonal" ? format(
|
||||
"seasonality='%s'",
|
||||
var.disk_utilization_forecast_seasonal_seasonality,
|
||||
"seasonality='%s'",
|
||||
var.disk_utilization_forecast_seasonal_seasonality,
|
||||
) : ""}
|
||||
)
|
||||
>= ${var.disk_utilization_forecast_threshold_critical}
|
||||
EOQ
|
||||
|
||||
thresholds = {
|
||||
thresholds = {
|
||||
critical = var.disk_utilization_forecast_threshold_critical
|
||||
critical_recovery = var.disk_utilization_forecast_threshold_critical_recovery
|
||||
}
|
||||
}
|
||||
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = false
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = false
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
|
||||
tags = concat(["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform"], var.disk_utilization_forecast_extra_tags)
|
||||
tags = concat(["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform"], var.disk_utilization_forecast_extra_tags)
|
||||
|
||||
lifecycle {
|
||||
lifecycle {
|
||||
ignore_changes = ["silenced"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#
|
||||
@ -177,38 +177,38 @@ resource "datadog_monitor" "memory_utilization_forecast" {
|
||||
${var.memory_utilization_forecast_deviations},
|
||||
interval='${var.memory_utilization_forecast_interval}',
|
||||
${var.memory_utilization_forecast_algorithm == "linear" ? format(
|
||||
"history='%s',model='%s'",
|
||||
var.memory_utilization_forecast_linear_history,
|
||||
var.memory_utilization_forecast_linear_model,
|
||||
) : ""}
|
||||
"history='%s',model='%s'",
|
||||
var.memory_utilization_forecast_linear_history,
|
||||
var.memory_utilization_forecast_linear_model,
|
||||
) : ""}
|
||||
${var.memory_utilization_forecast_algorithm == "seasonal" ? format(
|
||||
"seasonality='%s'",
|
||||
var.memory_utilization_forecast_seasonal_seasonality,
|
||||
"seasonality='%s'",
|
||||
var.memory_utilization_forecast_seasonal_seasonality,
|
||||
) : ""}
|
||||
)
|
||||
>= ${var.memory_utilization_forecast_threshold_critical}
|
||||
EOQ
|
||||
|
||||
thresholds = {
|
||||
thresholds = {
|
||||
critical = var.memory_utilization_forecast_threshold_critical
|
||||
critical_recovery = var.memory_utilization_forecast_threshold_critical_recovery
|
||||
}
|
||||
}
|
||||
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = false
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = false
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
|
||||
tags = concat(["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform"], var.memory_utilization_forecast_extra_tags)
|
||||
tags = concat(["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform"], var.memory_utilization_forecast_extra_tags)
|
||||
|
||||
lifecycle {
|
||||
lifecycle {
|
||||
ignore_changes = ["silenced"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#
|
||||
|
||||
@ -87,7 +87,7 @@ resource "datadog_monitor" "disk_throttled_ops" {
|
||||
message = coalesce(var.disk_throttled_ops_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.disk_throttled_ops_time_aggregator}(${var.disk_throttled_ops_timeframe}):
|
||||
(
|
||||
sum:gcp.gce.instance.disk.throttled_read_ops_count{${var.filter_tags}} by {instance_name, device_name} +
|
||||
|
||||
@ -83,7 +83,7 @@ resource "datadog_monitor" "backend_latency_service" {
|
||||
message = coalesce(var.backend_latency_service_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.backend_latency_service_time_aggregator}(${var.backend_latency_service_timeframe}):
|
||||
default(min:gcp.loadbalancing.https.backend_latencies.avg{${var.filter_tags},backend_target_type:backend_service} by {backend_target_name,forwarding_rule_name}, 0)
|
||||
> ${var.backend_latency_service_threshold_critical}
|
||||
@ -120,7 +120,7 @@ resource "datadog_monitor" "backend_latency_bucket" {
|
||||
message = coalesce(var.backend_latency_bucket_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.backend_latency_bucket_time_aggregator}(${var.backend_latency_bucket_timeframe}):
|
||||
default(min:gcp.loadbalancing.https.backend_latencies.avg{${var.filter_tags},backend_target_type:backend_bucket} by {backend_target_name,forwarding_rule_name}, 0)
|
||||
> ${var.backend_latency_bucket_threshold_critical}
|
||||
|
||||
@ -78,7 +78,7 @@ resource "datadog_monitor" "cluster_initializing_shards" {
|
||||
message = coalesce(var.cluster_initializing_shards_message, var.message)
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.cluster_initializing_shards_time_aggregator}(${var.cluster_initializing_shards_timeframe}):
|
||||
avg:elasticsearch.initializing_shards${module.filter-tags.query_alert} by {cluster_name}
|
||||
> ${var.cluster_initializing_shards_threshold_critical}
|
||||
@ -113,7 +113,7 @@ resource "datadog_monitor" "cluster_relocating_shards" {
|
||||
message = coalesce(var.cluster_relocating_shards_message, var.message)
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.cluster_relocating_shards_time_aggregator}(${var.cluster_relocating_shards_timeframe}):
|
||||
avg:elasticsearch.relocating_shards${module.filter-tags.query_alert} by {cluster_name}
|
||||
> ${var.cluster_relocating_shards_threshold_critical}
|
||||
@ -222,7 +222,7 @@ resource "datadog_monitor" "jvm_heap_memory_usage" {
|
||||
message = coalesce(var.jvm_heap_memory_usage_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.jvm_heap_memory_usage_time_aggregator}(${var.jvm_heap_memory_usage_timeframe}):
|
||||
avg:jvm.mem.heap_in_use${module.filter-tags.query_alert} by {node_name}
|
||||
> ${var.jvm_heap_memory_usage_threshold_critical}
|
||||
@ -257,7 +257,7 @@ resource "datadog_monitor" "jvm_memory_young_usage" {
|
||||
message = coalesce(var.jvm_memory_young_usage_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.jvm_memory_young_usage_time_aggregator}(${var.jvm_memory_young_usage_timeframe}):
|
||||
avg:jvm.mem.pools.young.used${module.filter-tags.query_alert} by {node_name} / avg:jvm.mem.pools.young.max${module.filter-tags.query_alert} by {node_name} * 100
|
||||
> ${var.jvm_memory_young_usage_threshold_critical}
|
||||
@ -362,7 +362,7 @@ resource "datadog_monitor" "jvm_gc_young_collection_latency" {
|
||||
message = coalesce(var.jvm_gc_young_collection_latency_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.jvm_gc_young_collection_latency_time_aggregator}(${var.jvm_gc_young_collection_latency_timeframe}):
|
||||
avg:jvm.gc.collectors.young.collection_time${module.filter-tags.query_alert} by {node_name} / avg:jvm.gc.collectors.young.count${module.filter-tags.query_alert} by {node_name} * 1000
|
||||
> ${var.jvm_gc_young_collection_latency_threshold_critical}
|
||||
@ -398,7 +398,7 @@ resource "datadog_monitor" "indexing_latency" {
|
||||
type = "query alert"
|
||||
|
||||
// TODO add tags to filter by node type and do not apply this monitor on non-data nodes
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.indexing_latency_time_aggregator}(${var.indexing_latency_timeframe}):
|
||||
avg:elasticsearch.indexing.index.time${module.filter-tags.query_alert} by {node_name}/ avg:elasticsearch.indexing.index.total${module.filter-tags.query_alert} by {node_name} * 1000
|
||||
> ${var.indexing_latency_threshold_critical}
|
||||
@ -518,7 +518,7 @@ resource "datadog_monitor" "search_query_latency" {
|
||||
type = "query alert"
|
||||
|
||||
// TODO add tags to filter by node type and do not apply this monitor on non-data nodes
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.search_query_latency_time_aggregator}(${var.search_query_latency_timeframe}):
|
||||
avg:elasticsearch.search.query.time${module.filter-tags.query_alert} by {node_name} / avg:elasticsearch.search.query.total${module.filter-tags.query_alert} by {node_name} * 1000
|
||||
> ${var.search_query_latency_threshold_critical}
|
||||
@ -554,7 +554,7 @@ resource "datadog_monitor" "fetch_latency" {
|
||||
type = "query alert"
|
||||
|
||||
// TODO add tags to filter by node type and do not apply this monitor on non-data nodes
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.fetch_latency_time_aggregator}(${var.fetch_latency_timeframe}):
|
||||
avg:elasticsearch.search.fetch.time${module.filter-tags.query_alert} by {node_name} / avg:elasticsearch.search.fetch.total${module.filter-tags.query_alert} by {node_name} * 1000
|
||||
> ${var.fetch_latency_threshold_critical}
|
||||
@ -660,7 +660,7 @@ resource "datadog_monitor" "field_data_evictions_change" {
|
||||
type = "query alert"
|
||||
|
||||
// TODO add tags to filter by node type and do not apply this monitor on non-data nodes
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
change(${var.field_data_evictions_change_time_aggregator}(${var.field_data_evictions_change_timeframe}),${var.field_data_evictions_change_timeshift}):
|
||||
avg:elasticsearch.fielddata.evictions${module.filter-tags.query_alert} by {node_name}
|
||||
> ${var.field_data_evictions_change_threshold_critical}
|
||||
@ -696,7 +696,7 @@ resource "datadog_monitor" "query_cache_evictions_change" {
|
||||
type = "query alert"
|
||||
|
||||
// TODO add tags to filter by node type and do not apply this monitor on non-data nodes
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
change(${var.query_cache_evictions_change_time_aggregator}(${var.query_cache_evictions_change_timeframe}),${var.query_cache_evictions_change_timeshift}):
|
||||
avg:elasticsearch.indices.query_cache.evictions${module.filter-tags.query_alert} by {node_name}
|
||||
> ${var.query_cache_evictions_change_threshold_critical}
|
||||
|
||||
@ -64,7 +64,7 @@ resource "datadog_monitor" "mongodb_server_count" {
|
||||
message = coalesce(var.mongodb_server_count_message, var.message)
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.mongodb_server_count_aggregator}(${var.mongodb_server_count_timeframe}):
|
||||
sum:mongodb.replset.health${module.filter-tags.query_alert} by {replset_name}
|
||||
> 99
|
||||
@ -97,7 +97,7 @@ resource "datadog_monitor" "mongodb_replication" {
|
||||
message = coalesce(var.mongodb_replication_message, var.message)
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.mongodb_replication_aggregator}(${var.mongodb_replication_timeframe}):
|
||||
avg:mongodb.replset.replicationlag${module.filter-tags-secondary.query_alert} by {server} > ${var.mongodb_lag_critical}
|
||||
EOQ
|
||||
|
||||
@ -69,7 +69,7 @@ resource "datadog_monitor" "mysql_aborted" {
|
||||
message = coalesce(var.mysql_aborted_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.mysql_aborted_time_aggregator}(${var.mysql_aborted_timeframe}): (
|
||||
avg:mysql.net.aborted_connects${module.filter-tags.query_alert} by {server} /
|
||||
avg:mysql.performance.threads_connected${module.filter-tags.query_alert} by {server}
|
||||
@ -102,7 +102,7 @@ resource "datadog_monitor" "mysql_slow" {
|
||||
message = coalesce(var.mysql_slow_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.mysql_slow_time_aggregator}(${var.mysql_slow_timeframe}): (
|
||||
avg:mysql.performance.slow_queries${module.filter-tags.query_alert} by {server} /
|
||||
avg:mysql.performance.queries${module.filter-tags.query_alert} by {server}
|
||||
@ -202,7 +202,7 @@ resource "datadog_monitor" "mysql_threads_anomaly" {
|
||||
message = coalesce(var.mysql_threads_message, var.message)
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.mysql_threads_time_aggregator}(${var.mysql_threads_timeframe}):
|
||||
anomalies(
|
||||
avg:mysql.performance.threads_running${module.filter-tags.query_alert} by {server},
|
||||
@ -248,7 +248,7 @@ resource "datadog_monitor" "mysql_questions_anomaly" {
|
||||
message = coalesce(var.mysql_questions_message, var.message)
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.mysql_questions_time_aggregator}(${var.mysql_questions_timeframe}):
|
||||
anomalies(
|
||||
avg:mysql.performance.questions${module.filter-tags.query_alert} by {server},
|
||||
|
||||
@ -68,7 +68,7 @@ resource "datadog_monitor" "postgresql_too_many_locks" {
|
||||
message = coalesce(var.postgresql_lock_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.postgresql_lock_time_aggregator}(${var.postgresql_lock_timeframe}):
|
||||
default(avg:postgresql.locks${module.filter-tags.query_alert} by {server}, 0)
|
||||
> ${var.postgresql_lock_threshold_critical}
|
||||
|
||||
@ -73,7 +73,7 @@ resource "datadog_monitor" "expirations" {
|
||||
message = coalesce(var.expirations_rate_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.expirations_rate_time_aggregator}(${var.expirations_rate_timeframe}): (
|
||||
avg:redis.expires.percent${module.filter-tags.query_alert} by {redis_host,redis_port}
|
||||
) > ${var.expirations_rate_threshold_critical}
|
||||
@ -107,7 +107,7 @@ resource "datadog_monitor" "blocked_clients" {
|
||||
message = coalesce(var.blocked_clients_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.blocked_clients_time_aggregator}(${var.blocked_clients_timeframe}): (
|
||||
sum:redis.clients.blocked${module.filter-tags.query_alert} by {redis_host,redis_port}
|
||||
/ sum:redis.net.clients${module.filter-tags.query_alert} by {redis_host,redis_port}
|
||||
@ -211,7 +211,7 @@ resource "datadog_monitor" "memory_frag" {
|
||||
message = coalesce(var.mem_frag_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.mem_frag_time_aggregator}(${var.mem_frag_timeframe}):
|
||||
avg:redis.mem.fragmentation_ratio${module.filter-tags.query_alert} by {redis_host,redis_port}
|
||||
* 100 > ${var.mem_frag_threshold_critical}
|
||||
@ -245,7 +245,7 @@ resource "datadog_monitor" "rejected_connections" {
|
||||
message = coalesce(var.rejected_con_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
change(${var.rejected_con_time_aggregator}(${var.rejected_con_timeframe}),${var.rejected_con_timeframe}): (
|
||||
avg:redis.net.rejected${module.filter-tags.query_alert} by {redis_host,redis_port}
|
||||
) > ${var.rejected_con_threshold_critical}
|
||||
|
||||
@ -70,7 +70,7 @@ resource "datadog_monitor" "disk_space" {
|
||||
message = coalesce(var.disk_space_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.disk_space_time_aggregator}(${var.disk_space_timeframe}):
|
||||
avg:system.disk.in_use${module.filter-tags-disk.query_alert} by {host,device}
|
||||
* 100 > ${var.disk_space_threshold_critical}
|
||||
@ -103,45 +103,45 @@ resource "datadog_monitor" "disk_space_forecast" {
|
||||
message = coalesce(var.disk_space_forecast_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
query = <<EOQ
|
||||
${var.disk_space_forecast_time_aggregator}(${var.disk_space_forecast_timeframe}):
|
||||
forecast(avg:system.disk.in_use${module.filter-tags-disk.query_alert} by {host,device} * 100,
|
||||
'${var.disk_space_forecast_algorithm}',
|
||||
${var.disk_space_forecast_deviations},
|
||||
interval='${var.disk_space_forecast_interval}',
|
||||
${var.disk_space_forecast_algorithm == "linear" ? format(
|
||||
"history='%s',model='%s'",
|
||||
var.disk_space_forecast_linear_history,
|
||||
var.disk_space_forecast_linear_model,
|
||||
) : ""}
|
||||
"history='%s',model='%s'",
|
||||
var.disk_space_forecast_linear_history,
|
||||
var.disk_space_forecast_linear_model,
|
||||
) : ""}
|
||||
${var.disk_space_forecast_algorithm == "seasonal" ? format(
|
||||
"seasonality='%s'",
|
||||
var.disk_space_forecast_seasonal_seasonality,
|
||||
"seasonality='%s'",
|
||||
var.disk_space_forecast_seasonal_seasonality,
|
||||
) : ""}
|
||||
)
|
||||
>= ${var.disk_space_forecast_threshold_critical}
|
||||
EOQ
|
||||
|
||||
thresholds = {
|
||||
thresholds = {
|
||||
critical_recovery = var.disk_space_forecast_threshold_critical_recovery
|
||||
critical = var.disk_space_forecast_threshold_critical
|
||||
}
|
||||
}
|
||||
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = true
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = true
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
|
||||
tags = concat(["env:${var.environment}", "type:system", "provider:disk", "resource:generic", "team:claranet", "created-by:terraform"], var.disk_space_forecast_extra_tags)
|
||||
tags = concat(["env:${var.environment}", "type:system", "provider:disk", "resource:generic", "team:claranet", "created-by:terraform"], var.disk_space_forecast_extra_tags)
|
||||
|
||||
lifecycle {
|
||||
lifecycle {
|
||||
ignore_changes = ["silenced"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "disk_inodes" {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user