Merged in MON-227-monitors-for-gcp-lb (pull request #119)
MON-227 monitors for gcp lb Approved-by: Rafael Romero Carmona <rafael.romero.carmona@fr.clara.net> Approved-by: Quentin Manfroi <quentin.manfroi@yahoo.fr> Approved-by: Alexandre Gaillet <alexandre.gaillet@fr.clara.net> Approved-by: Jean-Philippe LAINÉ <jean-philippe.laine@fr.clara.net>
This commit is contained in:
commit
198baab540
@ -96,6 +96,7 @@ The `//` is very important, it's a terraform specific syntax used to separate gi
|
||||
- [cloud-sql](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/cloud-sql/)
|
||||
- [common](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/cloud-sql/common/)
|
||||
- [mysql](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/cloud-sql/mysql/)
|
||||
- [lb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/lb/)
|
||||
- [pubsub](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/pubsub/)
|
||||
- [common](https://bitbucket.org/morea/terraform.feature.datadog/src/master/common/)
|
||||
- [alerting-message](https://bitbucket.org/morea/terraform.feature.datadog/src/master/common/alerting-message/)
|
||||
|
||||
84
cloud/gcp/lb/README.md
Normal file
84
cloud/gcp/lb/README.md
Normal file
@ -0,0 +1,84 @@
|
||||
# CLOUD GCP LB DataDog monitors
|
||||
|
||||
## How to use this module
|
||||
|
||||
```
|
||||
module "datadog-monitors-cloud-gcp-lb" {
|
||||
source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/gcp/lb?ref={revision}"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${module.datadog-message-alerting.alerting-message}"
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
## Purpose
|
||||
|
||||
Creates DataDog monitors with the following checks:
|
||||
|
||||
- GCP LB 4xx errors
|
||||
- GCP LB 5xx errors
|
||||
- GCP LB bucket backend latency
|
||||
- GCP LB Requests count increased abruptly
|
||||
- GCP LB service backend latency
|
||||
|
||||
## Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| backend_latency_bucket_extra_tags | Extra tags for GCP LB Backend Latency monitor | list | `<list>` | no |
|
||||
| backend_latency_bucket_message | Custom message for the GCP LB Backend Latency monitor | string | `` | no |
|
||||
| backend_latency_bucket_silenced | Groups to mute for GCP LB Backend Latency monitor | map | `<map>` | no |
|
||||
| backend_latency_bucket_threshold_critical | Latency in milliseconds (critical threshold) | string | `8000` | no |
|
||||
| backend_latency_bucket_threshold_warning | Latency in milliseconds (warning threshold) | string | `4000` | no |
|
||||
| backend_latency_bucket_time_aggregator | Timeframe for the GCP LB Backend Latency monitor | string | `min` | no |
|
||||
| backend_latency_bucket_timeframe | Timeframe for the GCP LB Backend Latency monitor | string | `last_10m` | no |
|
||||
| backend_latency_service_extra_tags | Extra tags for GCP LB Backend Latency monitor | list | `<list>` | no |
|
||||
| backend_latency_service_message | Custom message for the GCP LB Backend Latency monitor | string | `` | no |
|
||||
| backend_latency_service_silenced | Groups to mute for GCP LB Backend Latency monitor | map | `<map>` | no |
|
||||
| backend_latency_service_threshold_critical | Latency in milliseconds (critical threshold) | string | `1500` | no |
|
||||
| backend_latency_service_threshold_warning | Latency in milliseconds (warning threshold) | string | `1000` | no |
|
||||
| backend_latency_service_time_aggregator | Timeframe for the GCP LB Backend Latency monitor | string | `min` | no |
|
||||
| backend_latency_service_timeframe | Timeframe for the GCP LB Backend Latency monitor | string | `last_10m` | no |
|
||||
| environment | Architecture environment | string | - | yes |
|
||||
| error_rate_4xx_artificial_request | Divisor Delta for the GCP LB 4XX Errors monitor | string | `5` | no |
|
||||
| error_rate_4xx_extra_tags | Extra tags for GCP LB 4XX Errors monitor | list | `<list>` | no |
|
||||
| error_rate_4xx_message | Custom message for the GCP LB 4XX Errors monitor | string | `` | no |
|
||||
| error_rate_4xx_silenced | Groups to mute for GCP LB 4XX Errors monitor | map | `<map>` | no |
|
||||
| error_rate_4xx_threshold_critical | Rate error in percentage (critical threshold) | string | `50` | no |
|
||||
| error_rate_4xx_time_aggregator | Timeframe for the GCP LB 4XX Errors monitor | string | `sum` | no |
|
||||
| error_rate_4xx_timeframe | Timeframe for the GCP LB 4XX Errors monitor | string | `last_5m` | no |
|
||||
| error_rate_5xx_artificial_request | Divisor Delta for the GCP LB 5XX Errors monitor | string | `5` | no |
|
||||
| error_rate_5xx_extra_tags | Extra tags for GCP LB 5XX Errors monitor | list | `<list>` | no |
|
||||
| error_rate_5xx_message | Custom message for the GCP LB 5XX Errors monitor | string | `` | no |
|
||||
| error_rate_5xx_silenced | Groups to mute for GCP LB 5XX Errors monitor | map | `<map>` | no |
|
||||
| error_rate_5xx_threshold_critical | Rate error in percentage (critical threshold) | string | `50` | no |
|
||||
| error_rate_5xx_time_aggregator | Timeframe for the GCP LB 5XX Errors monitor | string | `sum` | no |
|
||||
| error_rate_5xx_timeframe | Timeframe for the GCP LB 5XX Errors monitor | string | `last_5m` | no |
|
||||
| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| filter_tags | Tags used for filtering | string | `*` | no |
|
||||
| message | Message sent when a monitor is triggered | string | - | yes |
|
||||
| new_host_delay | Delay in seconds for the new host evaluation | string | `300` | no |
|
||||
| request_count_extra_tags | Extra tags for GCP LB Request Count monitor | list | `<list>` | no |
|
||||
| request_count_message | Custom message for the GCP LB Request Count monitor | string | `` | no |
|
||||
| request_count_silenced | Groups to mute for GCP LB Request Count monitor | map | `<map>` | no |
|
||||
| request_count_threshold_critical | Desviation in percentage (critical threshold) | string | `500` | no |
|
||||
| request_count_threshold_warning | Desviation in percentage (warning threshold) | string | `250` | no |
|
||||
| request_count_time_aggregator | Timeframe for the GCP LB Request Count monitor | string | `sum` | no |
|
||||
| request_count_timeframe | Timeframe for the GCP LB Request Count monitor | string | `last_5m` | no |
|
||||
| request_count_timeshift | Timeshift for the GCP LB Request Count monitor | string | `last_5m` | no |
|
||||
|
||||
## Outputs
|
||||
|
||||
| Name | Description |
|
||||
|------|-------------|
|
||||
| backend_latency_bucket_id | id for monitor backend_latency_bucket |
|
||||
| backend_latency_service_id | id for monitor backend_latency_service |
|
||||
| error_rate_4xx_id | id for monitor error_rate_4xx |
|
||||
| error_rate_5xx_id | id for monitor error_rate_5xx |
|
||||
| request_count_id | id for monitor request_count |
|
||||
|
||||
## Related documentation
|
||||
|
||||
* [GCP LB Metrics](https://cloud.google.com/monitoring/api/metrics_gcp#gcp-loadbalancing)
|
||||
* [Datadog GCP integration](https://docs.datadoghq.com/integrations/google_cloud_platform/)
|
||||
257
cloud/gcp/lb/inputs.tf
Normal file
257
cloud/gcp/lb/inputs.tf
Normal file
@ -0,0 +1,257 @@
|
||||
#
|
||||
# Datadog global variables
|
||||
#
|
||||
variable "environment" {
|
||||
description = "Architecture environment"
|
||||
type = "string"
|
||||
}
|
||||
|
||||
variable "filter_tags" {
|
||||
description = "Tags used for filtering"
|
||||
default = "*"
|
||||
}
|
||||
|
||||
variable "message" {
|
||||
description = "Message sent when a monitor is triggered"
|
||||
}
|
||||
|
||||
variable "evaluation_delay" {
|
||||
description = "Delay in seconds for the metric evaluation"
|
||||
default = 900
|
||||
}
|
||||
|
||||
variable "new_host_delay" {
|
||||
description = "Delay in seconds for the new host evaluation"
|
||||
default = 300
|
||||
}
|
||||
|
||||
#
|
||||
# 4XX Errors
|
||||
#
|
||||
variable "error_rate_4xx_message" {
|
||||
description = "Custom message for the GCP LB 4XX Errors monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "error_rate_4xx_time_aggregator" {
|
||||
description = "Timeframe for the GCP LB 4XX Errors monitor"
|
||||
type = "string"
|
||||
default = "sum"
|
||||
}
|
||||
|
||||
variable "error_rate_4xx_timeframe" {
|
||||
description = "Timeframe for the GCP LB 4XX Errors monitor"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "error_rate_4xx_artificial_request" {
|
||||
description = "Divisor Delta for the GCP LB 4XX Errors monitor"
|
||||
type = "string"
|
||||
default = 5
|
||||
}
|
||||
|
||||
variable "error_rate_4xx_threshold_critical" {
|
||||
description = "Rate error in percentage (critical threshold)"
|
||||
type = "string"
|
||||
default = 50
|
||||
}
|
||||
|
||||
variable "error_rate_4xx_silenced" {
|
||||
description = "Groups to mute for GCP LB 4XX Errors monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "error_rate_4xx_extra_tags" {
|
||||
description = "Extra tags for GCP LB 4XX Errors monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
#
|
||||
# 5XX Errors
|
||||
#
|
||||
variable "error_rate_5xx_message" {
|
||||
description = "Custom message for the GCP LB 5XX Errors monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "error_rate_5xx_time_aggregator" {
|
||||
description = "Timeframe for the GCP LB 5XX Errors monitor"
|
||||
type = "string"
|
||||
default = "sum"
|
||||
}
|
||||
|
||||
variable "error_rate_5xx_timeframe" {
|
||||
description = "Timeframe for the GCP LB 5XX Errors monitor"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "error_rate_5xx_artificial_request" {
|
||||
description = "Divisor Delta for the GCP LB 5XX Errors monitor"
|
||||
type = "string"
|
||||
default = 5
|
||||
}
|
||||
|
||||
variable "error_rate_5xx_threshold_critical" {
|
||||
description = "Rate error in percentage (critical threshold)"
|
||||
type = "string"
|
||||
default = 50
|
||||
}
|
||||
|
||||
variable "error_rate_5xx_silenced" {
|
||||
description = "Groups to mute for GCP LB 5XX Errors monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "error_rate_5xx_extra_tags" {
|
||||
description = "Extra tags for GCP LB 5XX Errors monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
#
|
||||
# Latency Backend service
|
||||
#
|
||||
variable "backend_latency_service_message" {
|
||||
description = "Custom message for the GCP LB Backend Latency monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "backend_latency_service_time_aggregator" {
|
||||
description = "Timeframe for the GCP LB Backend Latency monitor"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "backend_latency_service_timeframe" {
|
||||
description = "Timeframe for the GCP LB Backend Latency monitor"
|
||||
type = "string"
|
||||
default = "last_10m"
|
||||
}
|
||||
|
||||
variable "backend_latency_service_threshold_warning" {
|
||||
description = "Latency in milliseconds (warning threshold)"
|
||||
type = "string"
|
||||
default = 1000
|
||||
}
|
||||
|
||||
variable "backend_latency_service_threshold_critical" {
|
||||
description = "Latency in milliseconds (critical threshold)"
|
||||
type = "string"
|
||||
default = 1500
|
||||
}
|
||||
|
||||
variable "backend_latency_service_silenced" {
|
||||
description = "Groups to mute for GCP LB Backend Latency monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "backend_latency_service_extra_tags" {
|
||||
description = "Extra tags for GCP LB Backend Latency monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
#
|
||||
# Latency Backend bucket
|
||||
#
|
||||
variable "backend_latency_bucket_message" {
|
||||
description = "Custom message for the GCP LB Backend Latency monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "backend_latency_bucket_time_aggregator" {
|
||||
description = "Timeframe for the GCP LB Backend Latency monitor"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "backend_latency_bucket_timeframe" {
|
||||
description = "Timeframe for the GCP LB Backend Latency monitor"
|
||||
type = "string"
|
||||
default = "last_10m"
|
||||
}
|
||||
|
||||
variable "backend_latency_bucket_threshold_warning" {
|
||||
description = "Latency in milliseconds (warning threshold)"
|
||||
type = "string"
|
||||
default = 4000
|
||||
}
|
||||
|
||||
variable "backend_latency_bucket_threshold_critical" {
|
||||
description = "Latency in milliseconds (critical threshold)"
|
||||
type = "string"
|
||||
default = 8000
|
||||
}
|
||||
|
||||
variable "backend_latency_bucket_silenced" {
|
||||
description = "Groups to mute for GCP LB Backend Latency monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "backend_latency_bucket_extra_tags" {
|
||||
description = "Extra tags for GCP LB Backend Latency monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
#
|
||||
# Request Count
|
||||
#
|
||||
variable "request_count_message" {
|
||||
description = "Custom message for the GCP LB Request Count monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "request_count_time_aggregator" {
|
||||
description = "Timeframe for the GCP LB Request Count monitor"
|
||||
type = "string"
|
||||
default = "sum"
|
||||
}
|
||||
|
||||
variable "request_count_timeframe" {
|
||||
description = "Timeframe for the GCP LB Request Count monitor"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "request_count_timeshift" {
|
||||
description = "Timeshift for the GCP LB Request Count monitor"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "request_count_threshold_warning" {
|
||||
description = "Desviation in percentage (warning threshold)"
|
||||
type = "string"
|
||||
default = 250
|
||||
}
|
||||
|
||||
variable "request_count_threshold_critical" {
|
||||
description = "Desviation in percentage (critical threshold)"
|
||||
type = "string"
|
||||
default = 500
|
||||
}
|
||||
|
||||
variable "request_count_silenced" {
|
||||
description = "Groups to mute for GCP LB Request Count monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "request_count_extra_tags" {
|
||||
description = "Extra tags for GCP LB Request Count monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
181
cloud/gcp/lb/monitors-lb.tf
Normal file
181
cloud/gcp/lb/monitors-lb.tf
Normal file
@ -0,0 +1,181 @@
|
||||
#
|
||||
# 4XX Errors
|
||||
#
|
||||
resource "datadog_monitor" "error_rate_4xx" {
|
||||
name = "[${var.environment}] GCP LB 4xx errors {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.error_rate_4xx_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.error_rate_4xx_time_aggregator}(${var.error_rate_4xx_timeframe}):
|
||||
default(sum:gcp.loadbalancing.https.request_count{${var.filter_tags},response_code_class:400} by {forwarding_rule_name}.as_count(), 0)
|
||||
/ (default(sum:gcp.loadbalancing.https.request_count{${var.filter_tags}} by {forwarding_rule_name}.as_count(), 0)
|
||||
+ ${var.error_rate_4xx_artificial_request}) * 100
|
||||
> ${var.error_rate_4xx_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
critical = "${var.error_rate_4xx_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = false
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.error_rate_4xx_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:lb", "team:claranet", "created-by:terraform", "${var.error_rate_4xx_extra_tags}"]
|
||||
}
|
||||
|
||||
#
|
||||
# 5XX Errors
|
||||
#
|
||||
resource "datadog_monitor" "error_rate_5xx" {
|
||||
name = "[${var.environment}] GCP LB 5xx errors {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.error_rate_5xx_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.error_rate_4xx_time_aggregator}(${var.error_rate_4xx_timeframe}):
|
||||
default(sum:gcp.loadbalancing.https.request_count{${var.filter_tags},response_code_class:500} by {forwarding_rule_name}.as_count(), 0)
|
||||
/ (default(sum:gcp.loadbalancing.https.request_count{${var.filter_tags}} by {forwarding_rule_name}.as_count(), 0)
|
||||
+ ${var.error_rate_4xx_artificial_request}) * 100
|
||||
> ${var.error_rate_4xx_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
critical = "${var.error_rate_5xx_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = false
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.error_rate_5xx_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:lb", "team:claranet", "created-by:terraform", "${var.error_rate_5xx_extra_tags}"]
|
||||
}
|
||||
|
||||
#
|
||||
# Backend Latency for service
|
||||
#
|
||||
resource "datadog_monitor" "backend_latency_service" {
|
||||
name = "[${var.environment}] GCP LB service backend latency {{#is_alert}}{{{comparator}}} {{threshold}}ms ({{value}}ms){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}ms ({{value}}ms){{/is_warning}}"
|
||||
message = "${coalesce(var.backend_latency_service_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.backend_latency_service_time_aggregator}(${var.backend_latency_service_timeframe}):
|
||||
min:gcp.loadbalancing.https.backend_latencies.avg{${var.filter_tags},backend_target_type:backend_service} by {backend_target_name,forwarding_rule_name}
|
||||
> ${var.backend_latency_service_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.backend_latency_service_threshold_warning}"
|
||||
critical = "${var.backend_latency_service_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = false
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.backend_latency_service_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:lb", "team:claranet", "created-by:terraform", "${var.backend_latency_service_extra_tags}"]
|
||||
}
|
||||
|
||||
#
|
||||
# Backend Latency for bucket
|
||||
#
|
||||
resource "datadog_monitor" "backend_latency_bucket" {
|
||||
name = "[${var.environment}] GCP LB bucket backend latency {{#is_alert}}{{{comparator}}} {{threshold}}ms ({{value}}ms){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}ms ({{value}}ms){{/is_warning}}"
|
||||
message = "${coalesce(var.backend_latency_bucket_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.backend_latency_bucket_time_aggregator}(${var.backend_latency_bucket_timeframe}):
|
||||
min:gcp.loadbalancing.https.backend_latencies.avg{${var.filter_tags},backend_target_type:backend_bucket} by {backend_target_name,forwarding_rule_name}
|
||||
> ${var.backend_latency_bucket_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.backend_latency_bucket_threshold_warning}"
|
||||
critical = "${var.backend_latency_bucket_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = false
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.backend_latency_bucket_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:lb", "team:claranet", "created-by:terraform", "${var.backend_latency_bucket_extra_tags}"]
|
||||
}
|
||||
|
||||
#
|
||||
# Request Count
|
||||
#
|
||||
resource "datadog_monitor" "request_count" {
|
||||
name = "[${var.environment}] GCP LB Requests count increased abruptly {{#is_alert}}{{value}}%{{/is_alert}}{{#is_warning}}{{value}}%{{/is_warning}}"
|
||||
message = "${coalesce(var.request_count_message, var.message)}"
|
||||
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOF
|
||||
pct_change(${var.request_count_time_aggregator}(${var.request_count_timeframe}),${var.request_count_timeshift}):
|
||||
default(sum:gcp.loadbalancing.https.request_count{${var.filter_tags}} by {forwarding_rule_name}.as_count(), 0)
|
||||
> ${var.request_count_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.request_count_threshold_warning}"
|
||||
critical = "${var.request_count_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = false
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.request_count_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:lb", "team:claranet", "created-by:terraform", "${var.request_count_extra_tags}"]
|
||||
}
|
||||
24
cloud/gcp/lb/outputs.tf
Normal file
24
cloud/gcp/lb/outputs.tf
Normal file
@ -0,0 +1,24 @@
|
||||
output "error_rate_4xx_id" {
|
||||
description = "id for monitor error_rate_4xx"
|
||||
value = "${datadog_monitor.error_rate_4xx.*.id}"
|
||||
}
|
||||
|
||||
output "error_rate_5xx_id" {
|
||||
description = "id for monitor error_rate_5xx"
|
||||
value = "${datadog_monitor.error_rate_5xx.*.id}"
|
||||
}
|
||||
|
||||
output "backend_latency_service_id" {
|
||||
description = "id for monitor backend_latency_service"
|
||||
value = "${datadog_monitor.backend_latency_service.*.id}"
|
||||
}
|
||||
|
||||
output "backend_latency_bucket_id" {
|
||||
description = "id for monitor backend_latency_bucket"
|
||||
value = "${datadog_monitor.backend_latency_bucket.*.id}"
|
||||
}
|
||||
|
||||
output "request_count_id" {
|
||||
description = "id for monitor request_count"
|
||||
value = "${datadog_monitor.request_count.*.id}"
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user