MON-273 update existing with new filter tags module

This commit is contained in:
Quentin Manfroi 2018-08-14 10:38:46 +02:00
parent 995ccd0ca4
commit 45f2ee3eff
38 changed files with 274 additions and 233 deletions

8
cloud/aws/alb/modules.tf Normal file
View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "alb"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -1,13 +1,3 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ?
format("dd_monitoring:enabled,dd_aws_alb:enabled,env:%s", var.environment) :
"${var.filter_tags_custom}"}"
}
}
resource "datadog_monitor" "ALB_no_healthy_instances" {
name = "[${var.environment}] ALB no healthy instances"
type = "metric alert"
@ -15,7 +5,7 @@ resource "datadog_monitor" "ALB_no_healthy_instances" {
query = <<EOF
${var.alb_no_healthy_instances_time_aggregator}(${var.alb_no_healthy_instances_timeframe}): (
sum:aws.applicationelb.healthy_host_count{${data.template_file.filter.rendered}} by {region,loadbalancer}
sum:aws.applicationelb.healthy_host_count${module.filter-tags.query_alert} by {region,loadbalancer}
) < 1
EOF
@ -44,7 +34,7 @@ resource "datadog_monitor" "ALB_latency" {
query = <<EOF
${var.latency_time_aggregator}(${var.latency_timeframe}): (
avg:aws.applicationelb.target_response_time.average{${data.template_file.filter.rendered}} by {region,loadbalancer}
avg:aws.applicationelb.target_response_time.average${module.filter-tags.query_alert} by {region,loadbalancer}
) > ${var.latency_threshold_critical}
EOF
@ -75,8 +65,8 @@ resource "datadog_monitor" "ALB_httpcode_5xx" {
query = <<EOF
sum(${var.httpcode_alb_5xx_timeframe}): (
default(
avg:aws.applicationelb.httpcode_alb_5xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() /
(avg:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
avg:aws.applicationelb.httpcode_alb_5xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_count() /
(avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
0) * 100
) > ${var.httpcode_alb_5xx_threshold_critical}
EOF
@ -108,8 +98,8 @@ resource "datadog_monitor" "ALB_httpcode_4xx" {
query = <<EOF
sum(${var.httpcode_alb_4xx_timeframe}): (
default(
avg:aws.applicationelb.httpcode_alb_4xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() /
(avg:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
avg:aws.applicationelb.httpcode_alb_4xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_count() /
(avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
0) * 100
) > ${var.httpcode_alb_4xx_threshold_critical}
EOF
@ -141,8 +131,8 @@ resource "datadog_monitor" "ALB_httpcode_target_5xx" {
query = <<EOF
sum(${var.httpcode_target_5xx_timeframe}): (
default(
avg:aws.applicationelb.httpcode_target_5xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() /
(avg:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
avg:aws.applicationelb.httpcode_target_5xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_count() /
(avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
0) * 100
) > ${var.httpcode_target_5xx_threshold_critical}
EOF
@ -174,8 +164,8 @@ resource "datadog_monitor" "ALB_httpcode_target_4xx" {
query = <<EOF
sum(${var.httpcode_target_4xx_timeframe}): (
default(
avg:aws.applicationelb.httpcode_target_4xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() /
(avg:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
avg:aws.applicationelb.httpcode_target_4xx${module.filter-tags.query_alert} by {region,loadbalancer}.as_count() /
(avg:aws.applicationelb.request_count${module.filter-tags.query_alert} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
0) * 100
) > ${var.httpcode_target_4xx_threshold_critical}
EOF

View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "apigateway"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "elasticsearch"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -1,11 +1,3 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_es:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
### Elasticsearch cluster status monitor ###
/* Note about the query
- If aws.es.cluster_statusred is 1 --> query value (= 2.1) > 2 : critical
@ -19,8 +11,8 @@ resource "datadog_monitor" "es_cluster_status" {
query = <<EOF
max(${var.es_cluster_status_timeframe}): (
avg:aws.es.cluster_statusred{${data.template_file.filter.rendered}} by {region,name} * 2 +
(avg:aws.es.cluster_statusyellow{${data.template_file.filter.rendered}} by {region,name} + 0.1)
avg:aws.es.cluster_statusred${module.filter-tags.query_alert} by {region,name} * 2 +
(avg:aws.es.cluster_statusyellow${module.filter-tags.query_alert} by {region,name} + 0.1)
) >= 2
EOF
@ -53,7 +45,7 @@ resource "datadog_monitor" "es_free_space_low" {
query = <<EOF
${var.diskspace_time_aggregator}(${var.diskspace_timeframe}): (
avg:aws.es.free_storage_space{${data.template_file.filter.rendered}} by {region,name} /
avg:aws.es.free_storage_space${module.filter-tags.query_alert} by {region,name} /
(${var.es_cluster_volume_size}*1000) * 100
) < ${var.diskspace_threshold_critical}
EOF
@ -87,7 +79,7 @@ resource "datadog_monitor" "es_cpu_90_15min" {
query = <<EOF
${var.cpu_time_aggregator}(${var.cpu_timeframe}): (
avg:aws.es.cpuutilization{${data.template_file.filter.rendered}} by {region,name}
avg:aws.es.cpuutilization${module.filter-tags.query_alert} by {region,name}
) > ${var.cpu_threshold_critical}
EOF

8
cloud/aws/elb/modules.tf Normal file
View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "elb"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -1,18 +1,10 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_elb:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
resource "datadog_monitor" "ELB_no_healthy_instances" {
name = "[${var.environment}] ELB no healthy instances"
message = "${coalesce(var.elb_no_healthy_instance_message, var.message)}"
query = <<EOF
${var.elb_no_healthy_instance_time_aggregator}(${var.elb_no_healthy_instance_timeframe}): (
sum:aws.elb.healthy_host_count{${data.template_file.filter.rendered}} by {region,loadbalancername}
sum:aws.elb.healthy_host_count${module.filter-tags.query_alert} by {region,loadbalancername}
) < 1
EOF
@ -40,8 +32,8 @@ resource "datadog_monitor" "ELB_too_much_4xx" {
query = <<EOF
sum(${var.elb_4xx_timeframe}): (
default(
avg:aws.elb.httpcode_elb_4xx{${data.template_file.filter.rendered}} by {region,loadbalancername}.as_count() /
(avg:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername}.as_count() + ${var.artificial_requests_count}),
avg:aws.elb.httpcode_elb_4xx${module.filter-tags.query_alert} by {region,loadbalancername}.as_count() /
(avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername}.as_count() + ${var.artificial_requests_count}),
0) * 100
) > ${var.elb_4xx_threshold_critical}
EOF
@ -75,8 +67,8 @@ resource "datadog_monitor" "ELB_too_much_5xx" {
query = <<EOF
sum(${var.elb_5xx_timeframe}): (
default(
avg:aws.elb.httpcode_elb_5xx{${data.template_file.filter.rendered}} by {region,loadbalancername} /
(avg:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername} + ${var.artificial_requests_count}),
avg:aws.elb.httpcode_elb_5xx${module.filter-tags.query_alert} by {region,loadbalancername} /
(avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername} + ${var.artificial_requests_count}),
0) * 100
) > ${var.elb_5xx_threshold_critical}
EOF
@ -110,8 +102,8 @@ resource "datadog_monitor" "ELB_too_much_4xx_backend" {
query = <<EOF
sum(${var.elb_backend_4xx_timeframe}): (
default(
avg:aws.elb.httpcode_backend_4xx{${data.template_file.filter.rendered}} by {region,loadbalancername} /
(avg:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername} + ${var.artificial_requests_count}),
avg:aws.elb.httpcode_backend_4xx${module.filter-tags.query_alert} by {region,loadbalancername} /
(avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername} + ${var.artificial_requests_count}),
0) * 100
) > ${var.elb_backend_4xx_threshold_critical}
EOF
@ -145,8 +137,8 @@ resource "datadog_monitor" "ELB_too_much_5xx_backend" {
query = <<EOF
sum(${var.elb_backend_5xx_timeframe}): (
default(
avg:aws.elb.httpcode_backend_5xx{${data.template_file.filter.rendered}} by {region,loadbalancername} /
(avg:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername} + ${var.artificial_requests_count}),
avg:aws.elb.httpcode_backend_5xx${module.filter-tags.query_alert} by {region,loadbalancername} /
(avg:aws.elb.request_count${module.filter-tags.query_alert} by {region,loadbalancername} + ${var.artificial_requests_count}),
0) * 100
) > ${var.elb_backend_5xx_threshold_critical}
EOF
@ -179,7 +171,7 @@ resource "datadog_monitor" "ELB_backend_latency" {
query = <<EOF
${var.elb_backend_latency_time_aggregator}(${var.elb_backend_latency_timeframe}): (
avg:aws.elb.latency{${data.template_file.filter.rendered}} by {region,loadbalancername}
avg:aws.elb.latency${module.filter-tags.query_alert} by {region,loadbalancername}
) > ${var.elb_backend_latency_critical}
EOF

View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "kinesis-firehose"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -1,11 +1,3 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_firehose:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
### Kinesis Firehose Incoming records ###
resource "datadog_monitor" "firehose_incoming_records" {
name = "[${var.environment}] Kinesis Firehose No incoming records"
@ -15,7 +7,7 @@ resource "datadog_monitor" "firehose_incoming_records" {
query = <<EOF
sum(${var.incoming_records_timeframe}): (
avg:aws.firehose.incoming_records{${data.template_file.filter.rendered}} by {region,deliverystreamname}
avg:aws.firehose.incoming_records${module.filter-tags.query_alert} by {region,deliverystreamname}
) <= 0
EOF

8
cloud/aws/rds/modules.tf Normal file
View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "rds"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -1,11 +1,3 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_rds:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
### RDS instance CPU monitor ###
resource "datadog_monitor" "rds_cpu_90_15min" {
name = "[${var.environment}] RDS instance CPU high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
@ -15,7 +7,7 @@ resource "datadog_monitor" "rds_cpu_90_15min" {
query = <<EOF
${var.cpu_time_aggregator}(${var.cpu_timeframe}): (
avg:aws.rds.cpuutilization{${data.template_file.filter.rendered}} by {region,name}
avg:aws.rds.cpuutilization${module.filter-tags.query_alert} by {region,name}
) > ${var.cpu_threshold_critical}
EOF
@ -47,8 +39,8 @@ resource "datadog_monitor" "rds_free_space_low" {
query = <<EOF
${var.diskspace_time_aggregator}(${var.diskspace_timeframe}): (
avg:aws.rds.free_storage_space{${data.template_file.filter.rendered}} by {region,name} /
avg:aws.rds.total_storage_space{${data.template_file.filter.rendered}} by {region,name} * 100
avg:aws.rds.free_storage_space${module.filter-tags.query_alert} by {region,name} /
avg:aws.rds.total_storage_space${module.filter-tags.query_alert} by {region,name} * 100
) < ${var.diskspace_threshold_critical}
EOF

8
cloud/aws/vpn/modules.tf Normal file
View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "vpn"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "apimanagement"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -1,19 +1,9 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ?
format("dd_monitoring:enabled,dd_azure_apimanagement:enabled,env:%s", var.environment) :
"${var.filter_tags_custom}"}"
}
}
resource "datadog_monitor" "apimgt_status" {
name = "[${var.environment}] API Management is down"
message = "${coalesce(var.status_message, var.message)}"
query = <<EOF
${var.status_time_aggregator}(${var.status_timeframe}):avg:azure.apimanagement_service.status{${data.template_file.filter.rendered}} by {resource_group,region,name} < 1
${var.status_time_aggregator}(${var.status_timeframe}):avg:azure.apimanagement_service.status${module.filter-tags.query_alert} by {resource_group,region,name} < 1
EOF
type = "metric alert"
@ -43,8 +33,8 @@ resource "datadog_monitor" "apimgt_failed_requests" {
query = <<EOF
sum(${var.failed_requests_timeframe}): (
avg:azure.apimanagement_service.failed_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
avg:azure.apimanagement_service.total_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() * 100
avg:azure.apimanagement_service.failed_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() /
avg:azure.apimanagement_service.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() * 100
) > ${var.failed_requests_threshold_critical}
EOF
@ -75,8 +65,8 @@ resource "datadog_monitor" "apimgt_other_requests" {
query = <<EOF
sum(${var.other_requests_timeframe}): (
avg:azure.apimanagement_service.other_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
avg:azure.apimanagement_service.total_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() * 100
avg:azure.apimanagement_service.other_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() /
avg:azure.apimanagement_service.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() * 100
) > ${var.other_requests_threshold_critical}
EOF
@ -107,8 +97,8 @@ resource "datadog_monitor" "apimgt_unauthorized_requests" {
query = <<EOF
sum(${var.unauthorized_requests_timeframe}): (
avg:azure.apimanagement_service.unauthorized_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
avg:azure.apimanagement_service.total_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() * 100
avg:azure.apimanagement_service.unauthorized_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() /
avg:azure.apimanagement_service.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() * 100
) > ${var.unauthorized_requests_threshold_critical}
EOF
@ -139,8 +129,8 @@ resource "datadog_monitor" "apimgt_successful_requests" {
query = <<EOF
sum(${var.successful_requests_timeframe}): (
avg:azure.apimanagement_service.successful_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
avg:azure.apimanagement_service.total_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() * 100
avg:azure.apimanagement_service.successful_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() /
avg:azure.apimanagement_service.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() * 100
) < ${var.successful_requests_threshold_critical}
EOF

View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "app-services"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -1,11 +1,3 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
# Monitoring App Services response time
resource "datadog_monitor" "appservices_response_time" {
name = "[${var.environment}] App Services response time too high {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}"
@ -14,7 +6,7 @@ resource "datadog_monitor" "appservices_response_time" {
query = <<EOF
${var.response_time_time_aggregator}(${var.response_time_timeframe}): (
avg:azure.app_services.average_response_time{${data.template_file.filter.rendered}} by {resource_group,region,name}
avg:azure.app_services.average_response_time${module.filter-tags.query_alert} by {resource_group,region,name}
) > ${var.response_time_threshold_critical}
EOF
@ -45,7 +37,7 @@ resource "datadog_monitor" "appservices_memory_usage_count" {
query = <<EOF
${var.memory_usage_time_aggregator}(${var.memory_usage_timeframe}): (
avg:azure.app_services.memory_working_set{${data.template_file.filter.rendered}} by {resource_group,region,name}
avg:azure.app_services.memory_working_set${module.filter-tags.query_alert} by {resource_group,region,name}
) > ${var.memory_usage_threshold_critical}
EOF
@ -76,8 +68,8 @@ resource "datadog_monitor" "appservices_http_5xx_errors_count" {
query = <<EOF
sum(${var.http_5xx_requests_timeframe}): (
avg:azure.app_services.http5xx{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
avg:azure.app_services.requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
avg:azure.app_services.http5xx${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() /
avg:azure.app_services.requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count()
) * 100 > ${var.http_5xx_requests_threshold_critical}
EOF
@ -108,8 +100,8 @@ resource "datadog_monitor" "appservices_http_4xx_errors_count" {
query = <<EOF
sum(${var.http_4xx_requests_timeframe}): (
avg:azure.app_services.http4xx{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
avg:azure.app_services.requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
avg:azure.app_services.http4xx${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() /
avg:azure.app_services.requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count()
) * 100 > ${var.http_4xx_requests_threshold_critical}
EOF
@ -140,9 +132,9 @@ resource "datadog_monitor" "appservices_http_success_status_rate" {
query = <<EOF
sum(${var.http_successful_requests_timeframe}): (
(avg:azure.app_services.http2xx{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() +
avg:azure.app_services.http3xx{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()) /
avg:azure.app_services.requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
(avg:azure.app_services.http2xx${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() +
avg:azure.app_services.http3xx${module.filter-tags.query_alert} by {resource_group,region,name}.as_count()) /
avg:azure.app_services.requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count()
) * 100 < ${var.http_successful_requests_threshold_critical}
EOF

View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "eventhub"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -1,18 +1,10 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_eventhub:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
resource "datadog_monitor" "eventhub_status" {
name = "[${var.environment}] Event Hub is down"
message = "${coalesce(var.status_message, var.message)}"
query = <<EOF
${var.status_time_aggregator}(${var.status_timeframe}): (
avg:azure.eventhub_namespaces.status{${data.template_file.filter.rendered}} by {resource_group,region,name}
avg:azure.eventhub_namespaces.status${module.filter-tags.query_alert} by {resource_group,region,name}
) != 1
EOF
@ -40,8 +32,8 @@ resource "datadog_monitor" "eventhub_failed_requests" {
query = <<EOF
sum(${var.failed_requests_rate_timeframe}): (
default(
avg:azure.eventhub_namespaces.failed_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
avg:azure.eventhub_namespaces.incoming_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count(),
avg:azure.eventhub_namespaces.failed_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() /
avg:azure.eventhub_namespaces.incoming_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count(),
0) * 100
) > ${var.failed_requests_rate_thresold_critical}
EOF
@ -76,11 +68,11 @@ resource "datadog_monitor" "eventhub_errors" {
sum(${var.errors_rate_timeframe}): (
default(
(
avg:azure.eventhub_namespaces.internal_server_errors{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() +
avg:azure.eventhub_namespaces.server_busy_errors{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() +
avg:azure.eventhub_namespaces.other_errors{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
avg:azure.eventhub_namespaces.internal_server_errors${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() +
avg:azure.eventhub_namespaces.server_busy_errors${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() +
avg:azure.eventhub_namespaces.other_errors${module.filter-tags.query_alert} by {resource_group,region,name}.as_count()
) / (
avg:eventhub_namespaces.incoming_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
avg:eventhub_namespaces.incoming_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count()
),
0) * 100
) > ${var.errors_rate_thresold_critical}

View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "iothubs"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "redis"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -1,18 +1,10 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_redis:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
resource "datadog_monitor" "status" {
name = "[${var.environment}] Redis {{name}} is down"
message = "${coalesce(var.status_message, var.message)}"
query = <<EOF
${var.status_time_aggregator}(${var.status_timeframe}): (
avg:azure.cache_redis.status{${data.template_file.filter.rendered}} by {resource_group,region,name}
avg:azure.cache_redis.status${module.filter-tags.query_alert} by {resource_group,region,name}
) != 1
EOF
@ -39,7 +31,7 @@ resource "datadog_monitor" "evictedkeys" {
query = <<EOF
${var.evictedkeys_limit_time_aggregator}(${var.evictedkeys_limit_timeframe}): (
avg:azure.cache_redis.evictedkeys{${data.template_file.filter.rendered}} by {resource_group,region,name}
avg:azure.cache_redis.evictedkeys${module.filter-tags.query_alert} by {resource_group,region,name}
) > ${var.evictedkeys_limit_threshold_critical}
EOF
@ -71,7 +63,7 @@ resource "datadog_monitor" "percent_processor_time" {
query = <<EOF
${var.percent_processor_time_time_aggregator}(${var.percent_processor_time_timeframe}): (
avg:azure.cache_redis.percent_processor_time{${data.template_file.filter.rendered}} by {resource_group,region,name}
avg:azure.cache_redis.percent_processor_time${module.filter-tags.query_alert} by {resource_group,region,name}
) > ${var.percent_processor_time_threshold_critical}
EOF
@ -103,7 +95,7 @@ resource "datadog_monitor" "server_load" {
query = <<EOF
${var.server_load_rate_time_aggregator}(${var.server_load_rate_timeframe}): (
avg:azure.cache_redis.server_load{${data.template_file.filter.rendered}} by {resource_group,region,name}
avg:azure.cache_redis.server_load${module.filter-tags.query_alert} by {resource_group,region,name}
) > ${var.server_load_rate_threshold_critical}
EOF

View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "servicebus"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -1,18 +1,10 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_servicebus:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
resource "datadog_monitor" "servicebus_status" {
name = "[${var.environment}] Service Bus is down"
message = "${coalesce(var.status_message, var.message)}"
query = <<EOF
${var.status_time_aggregator}(${var.status_timeframe}): (
avg:azure.servicebus_namespaces.status{${data.template_file.filter.rendered}} by {resource_group,region,name}
avg:azure.servicebus_namespaces.status${module.filter-tags.query_alert} by {resource_group,region,name}
) != 1
EOF

View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "sql-database"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -1,18 +1,10 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_sqldatabase:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
resource "datadog_monitor" "sql-database_cpu_90_15min" {
name = "[${var.environment}] SQL Database CPU too high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
message = "${coalesce(var.cpu_message, var.message)}"
query = <<EOF
${var.cpu_time_aggregator}(${var.cpu_timeframe}): (
avg:azure.sql_servers_databases.cpu_percent{${data.template_file.filter.rendered}} by {resource_group,region,name}
avg:azure.sql_servers_databases.cpu_percent${module.filter-tags.query_alert} by {resource_group,region,name}
) > ${var.cpu_threshold_critical}
EOF
@ -45,7 +37,7 @@ resource "datadog_monitor" "sql-database_free_space_low" {
query = <<EOF
${var.diskspace_time_aggregator}(${var.diskspace_timeframe}): (
avg:azure.sql_servers_databases.storage_percent{${data.template_file.filter.rendered}} by {resource_group,region,name}
avg:azure.sql_servers_databases.storage_percent${module.filter-tags.query_alert} by {resource_group,region,name}
) > ${var.diskspace_threshold_critical}
EOF
@ -77,7 +69,7 @@ resource "datadog_monitor" "sql-database_dtu_consumption_high" {
query = <<EOF
${var.dtu_time_aggregator}(${var.dtu_timeframe}): (
azure.sql_servers_databases.dtu_consumption_percent{${data.template_file.filter.rendered}} by {resource_group,region,name}
azure.sql_servers_databases.dtu_consumption_percent${module.filter-tags.query_alert} by {resource_group,region,name}
) > ${var.dtu_threshold_critical}
EOF
@ -109,7 +101,7 @@ resource "datadog_monitor" "sql-database_deadlocks_count" {
query = <<EOF
sum(${var.deadlock_timeframe}): (
avg:azure.sql_servers_databases.deadlock{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
avg:azure.sql_servers_databases.deadlock${module.filter-tags.query_alert} by {resource_group,region,name}.as_count()
) > ${var.deadlock_threshold_critical}
EOF

View File

@ -0,0 +1,9 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "storage"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
extra_tags = ["transaction_type:all"]
}

View File

@ -1,18 +1,10 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
resource "datadog_monitor" "availability" {
name = "[${var.environment}] Azure Storage is down"
message = "${coalesce(var.availability_message, var.message)}"
query = <<EOF
${var.availability_time_aggregator}(${var.availability_timeframe}): (default(
avg:azure.storage.availability{${data.template_file.filter.rendered},transaction_type:all} by {resource_group,storage_type,name},
avg:azure.storage.availability${module.filter-tags.query_alert} by {resource_group,storage_type,name},
100)) < ${var.availability_threshold_critical}
EOF
@ -43,7 +35,7 @@ resource "datadog_monitor" "successful_requests" {
query = <<EOF
${var.successful_requests_time_aggregator}(${var.successful_requests_timeframe}): (default(
avg:azure.storage.percent_success{${data.template_file.filter.rendered},transaction_type:all} by {resource_group,storage_type,name},
avg:azure.storage.percent_success${module.filter-tags.query_alert} by {resource_group,storage_type,name},
100)) < ${var.successful_requests_threshold_critical}
EOF
@ -74,7 +66,7 @@ resource "datadog_monitor" "latency" {
query = <<EOF
${var.latency_time_aggregator}(${var.latency_timeframe}): (default(
avg:azure.storage.average_e2_e_latency{${data.template_file.filter.rendered},transaction_type:all} by {resource_group,storage_type,name},
avg:azure.storage.average_e2_e_latency${module.filter-tags.query_alert} by {resource_group,storage_type,name},
0)) > ${var.latency_threshold_critical}
EOF
@ -105,7 +97,7 @@ resource "datadog_monitor" "timeout_error_requests" {
query = <<EOF
${var.timeout_error_requests_time_aggregator}(${var.timeout_error_requests_timeframe}): (default(
avg:azure.storage.percent_timeout_error{${data.template_file.filter.rendered},transaction_type:all} by {resource_group,storage_type,name},
avg:azure.storage.percent_timeout_error${module.filter-tags.query_alert} by {resource_group,storage_type,name},
0)) > ${var.timeout_error_requests_threshold_critical}
EOF
@ -136,7 +128,7 @@ resource "datadog_monitor" "network_error_requests" {
query = <<EOF
${var.network_error_requests_time_aggregator}(${var.network_error_requests_timeframe}): (default(
avg:azure.storage.percent_network_error{${data.template_file.filter.rendered},transaction_type:all} by {resource_group,storage_type,name},
avg:azure.storage.percent_network_error${module.filter-tags.query_alert} by {resource_group,storage_type,name},
0)) > ${var.network_error_requests_threshold_critical}
EOF
@ -167,7 +159,7 @@ resource "datadog_monitor" "throttling_error_requests" {
query = <<EOF
${var.throttling_error_requests_time_aggregator}(${var.throttling_error_requests_timeframe}): (default(
avg:azure.storage.percent_throttling_error{${data.template_file.filter.rendered},transaction_type:all} by {resource_group,storage_type,name},
avg:azure.storage.percent_throttling_error${module.filter-tags.query_alert} by {resource_group,storage_type,name},
0)) > ${var.throttling_error_requests_threshold_critical}
EOF
@ -198,7 +190,7 @@ resource "datadog_monitor" "server_other_error_requests" {
query = <<EOF
${var.server_other_error_requests_time_aggregator}(${var.server_other_error_requests_timeframe}): (default(
avg:azure.storage.percent_server_other_error{${data.template_file.filter.rendered},transaction_type:all} by {resource_group,storage_type,name},
avg:azure.storage.percent_server_other_error${module.filter-tags.query_alert} by {resource_group,storage_type,name},
0)) > ${var.server_other_error_requests_threshold_critical}
EOF
@ -229,7 +221,7 @@ resource "datadog_monitor" "client_other_error_requests" {
query = <<EOF
${var.client_other_error_requests_time_aggregator}(${var.client_other_error_requests_timeframe}): (default(
avg:azure.storage.percent_client_other_error{${data.template_file.filter.rendered},transaction_type:all} by {resource_group,storage_type,name},
avg:azure.storage.percent_client_other_error${module.filter-tags.query_alert} by {resource_group,storage_type,name},
0)) > ${var.client_other_error_requests_threshold_critical}
EOF
@ -260,7 +252,7 @@ resource "datadog_monitor" "authorization_error_requests" {
query = <<EOF
${var.authorization_error_requests_time_aggregator}(${var.authorization_error_requests_timeframe}): (default(
avg:azure.storage.percent_authorization_error{${data.template_file.filter.rendered},transaction_type:all} by {resource_group,storage_type,name},
avg:azure.storage.percent_authorization_error${module.filter-tags.query_alert} by {resource_group,storage_type,name},
0)) > ${var.authorization_error_requests_threshold_critical}
EOF

View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "stream-analytics"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -1,18 +1,10 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_streamanalytics:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
resource "datadog_monitor" "status" {
name = "[${var.environment}] Stream Analytics is down"
message = "${coalesce(var.status_message, var.message)}"
query = <<EOF
${var.status_time_aggregator}(${var.status_timeframe}): (
avg:azure.streamanalytics_streamingjobs.status{${data.template_file.filter.rendered}} by {resource_group,region,name}
avg:azure.streamanalytics_streamingjobs.status${module.filter-tags.query_alert} by {resource_group,region,name}
) < 1
EOF
@ -39,7 +31,7 @@ resource "datadog_monitor" "su_utilization" {
query = <<EOF
${var.su_utilization_time_aggregator}(${var.su_utilization_timeframe}): (
avg:azure.streamanalytics_streamingjobs.resource_utilization{${data.template_file.filter.rendered}} by {resource_group,region,name}
avg:azure.streamanalytics_streamingjobs.resource_utilization${module.filter-tags.query_alert} by {resource_group,region,name}
) > ${var.su_utilization_threshold_critical}
EOF
@ -71,8 +63,8 @@ resource "datadog_monitor" "failed_function_requests" {
query = <<EOF
sum(${var.failed_function_requests_timeframe}): (
avg:azure.streamanalytics_streamingjobs.aml_callout_failed_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
avg:azure.streamanalytics_streamingjobs.aml_callout_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
avg:azure.streamanalytics_streamingjobs.aml_callout_failed_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() /
avg:azure.streamanalytics_streamingjobs.aml_callout_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count()
) * 100 > ${var.failed_function_requests_threshold_critical}
EOF
@ -104,7 +96,7 @@ resource "datadog_monitor" "conversion_errors" {
query = <<EOF
${var.conversion_errors_time_aggregator}(${var.conversion_errors_timeframe}): (
avg:azure.streamanalytics_streamingjobs.conversion_errors{${data.template_file.filter.rendered}} by {resource_group,region,name}
avg:azure.streamanalytics_streamingjobs.conversion_errors${module.filter-tags.query_alert} by {resource_group,region,name}
) > ${var.conversion_errors_threshold_critical}
EOF
@ -136,7 +128,7 @@ resource "datadog_monitor" "runtime_errors" {
query = <<EOF
${var.runtime_errors_time_aggregator}(${var.runtime_errors_timeframe}): (
avg:azure.streamanalytics_streamingjobs.errors{${data.template_file.filter.rendered}} by {resource_group,region,name}
avg:azure.streamanalytics_streamingjobs.errors${module.filter-tags.query_alert} by {resource_group,region,name}
) > ${var.runtime_errors_threshold_critical}
EOF

View File

@ -0,0 +1,18 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "mongodb"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}
module "filter-tags-secondary" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "mongodb"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
extra_tags = ["replset_state:secondary"]
}

View File

@ -1,18 +1,10 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_mongodb:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
resource "datadog_monitor" "mongodb_primary" {
name = "[${var.environment}] MongoDB primary state"
message = "${coalesce(var.mongodb_primary_message, var.message)}"
query = <<EOF
${var.mongodb_primary_aggregator}(${var.mongodb_primary_timeframe}):
min:mongodb.replset.state{${data.template_file.filter.rendered}} by {replset_name} >= 2
min:mongodb.replset.state${module.filter-tags.query_alert} by {replset_name} >= 2
EOF
type = "metric alert"
@ -38,7 +30,7 @@ resource "datadog_monitor" "mongodb_secondary" {
query = <<EOF
${var.mongodb_secondary_aggregator}(${var.mongodb_secondary_timeframe}):
${var.mongodb_desired_servers_count} -
sum:mongodb.replset.health{${data.template_file.filter.rendered}} by {replset_name}
sum:mongodb.replset.health${module.filter-tags.query_alert} by {replset_name}
> 1
EOF
@ -69,7 +61,7 @@ resource "datadog_monitor" "mongodb_server_count" {
query = <<EOF
${var.mongodb_server_count_aggregator}(${var.mongodb_server_count_timeframe}):
sum:mongodb.replset.health{${data.template_file.filter.rendered}} by {replset_name}
sum:mongodb.replset.health${module.filter-tags.query_alert} by {replset_name}
> 99
EOF
@ -100,7 +92,7 @@ resource "datadog_monitor" "mongodb_replication" {
query = <<EOF
${var.mongodb_replication_aggregator}(${var.mongodb_replication_timeframe}):
avg:mongodb.replset.replicationlag{${data.template_file.filter.rendered},replset_state:secondary} by {server} > ${var.mongodb_lag_critical}
avg:mongodb.replset.replicationlag${module.filter-tags-secondary.query_alert} by {server} > ${var.mongodb_lag_critical}
EOF
thresholds {

View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "apache"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -1,11 +1,3 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_apache:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
resource "datadog_monitor" "datadog_apache_process" {
name = "[${var.environment}] Can't connect to apache vhost status"
message = "${coalesce(var.apache_connect_message, var.message)}"
@ -13,7 +5,7 @@ resource "datadog_monitor" "datadog_apache_process" {
type = "service check"
query = <<EOF
"apache.can_connect".over("${data.template_file.filter.rendered}").by("host","port").last(6).count_by_status()
"apache.can_connect".over${module.filter-tags.service_check}.by("host","port").last(6).count_by_status()
EOF
thresholds = {

View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "nginx"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -1,11 +1,3 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_nginx:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
resource "datadog_monitor" "datadog_nginx_process" {
name = "[${var.environment}] Can't connect to nginx vhost status"
message = "${coalesce(var.nginx_connect_message, var.message)}"
@ -13,7 +5,7 @@ resource "datadog_monitor" "datadog_nginx_process" {
type = "service check"
query = <<EOF
"nginx.can_connect".over("${data.template_file.filter.rendered}").by("host","port").last(6).count_by_status()
"nginx.can_connect".over${module.filter-tags.service_check}.by("host","port").last(6).count_by_status()
EOF
thresholds = {

View File

@ -2,7 +2,7 @@ module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "php_fpm"
resource = "php-fpm"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

18
system/generic/modules.tf Normal file
View File

@ -0,0 +1,18 @@
module "filter-tags" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "generic"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}
module "filter-tags-disk" {
source = "../../common/filter-tags"
environment = "${var.environment}"
resource = "generic"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
extra_tags = ["dd_disk:enabled"]
}

View File

@ -1,18 +1,10 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_system:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
resource "datadog_monitor" "datadog_cpu_too_high" {
name = "[${var.environment}] CPU usage {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
message = "${coalesce(var.cpu_high_message, var.message)}"
query = <<EOF
${var.cpu_high_time_aggregator}(${var.cpu_high_timeframe}): (
100 - avg:system.cpu.idle{${data.template_file.filter.rendered}} by {region,host}
100 - avg:system.cpu.idle${module.filter-tags.query_alert} by {region,host}
) > ${var.cpu_high_threshold_critical}
EOF
@ -43,8 +35,8 @@ resource "datadog_monitor" "datadog_load_too_high" {
query = <<EOF
${var.cpu_load_time_aggregator}(${var.cpu_load_timeframe}): (
avg:system.load.5{${data.template_file.filter.rendered}} by {region,host} /
avg:system.core.count{${data.template_file.filter.rendered}} by {region,host}
avg:system.load.5${module.filter-tags.query_alert} by {region,host} /
avg:system.core.count${module.filter-tags.query_alert} by {region,host}
) > ${var.cpu_load_threshold_critical}
EOF
@ -75,8 +67,8 @@ resource "datadog_monitor" "datadog_free_disk_space_too_low" {
query = <<EOF
${var.free_disk_space_time_aggregator}(${var.free_disk_space_timeframe}): (
avg:system.disk.free{${data.template_file.filter.rendered},dd_disk:enabled} by {region,host,device} /
avg:system.disk.total{${data.template_file.filter.rendered},dd_disk:enabled} by {region,host,device} * 100
avg:system.disk.free${module.filter-tags-disk.query_alert} by {region,host,device} /
avg:system.disk.total${module.filter-tags-disk.query_alert} by {region,host,device} * 100
) < ${var.free_disk_space_threshold_critical}
EOF
@ -107,8 +99,8 @@ resource "datadog_monitor" "datadog_free_disk_space_inodes_too_low" {
query = <<EOF
${var.free_disk_inodes_time_aggregator}(${var.free_disk_inodes_timeframe}): (
avg:system.fs.inodes.free{${data.template_file.filter.rendered},dd_disk:enabled} by {region,host,device} /
avg:system.fs.inodes.total{${data.template_file.filter.rendered},dd_disk:enabled} by {region,host,device} * 100
avg:system.fs.inodes.free${module.filter-tags-disk.query_alert} by {region,host,device} /
avg:system.fs.inodes.total${module.filter-tags-disk.query_alert} by {region,host,device} * 100
) < ${var.free_disk_inodes_threshold_critical}
EOF
@ -139,8 +131,8 @@ resource "datadog_monitor" "datadog_free_memory" {
query = <<EOF
${var.free_memory_time_aggregator}(${var.free_memory_timeframe}): (
avg:system.mem.usable{${data.template_file.filter.rendered}} by {region,host} /
avg:system.mem.total{${data.template_file.filter.rendered}} by {region,host} * 100
avg:system.mem.usable${module.filter-tags.query_alert} by {region,host} /
avg:system.mem.total${module.filter-tags.query_alert} by {region,host} * 100
) < ${var.free_memory_threshold_critical}
EOF