MON-226 Basic monitors

This commit is contained in:
Rafael Romero Carmona 2018-08-01 14:40:24 +02:00 committed by Quentin Manfroi
parent e6fc224d73
commit 687b04876d
2 changed files with 217 additions and 0 deletions

118
cloud/gcp/pubsub/inputs.tf Normal file
View File

@ -0,0 +1,118 @@
#
# Datadog global variables
#
variable "environment" {
description = "Architecture environment"
type = "string"
}
variable "filter_tags_use_defaults" {
description = "Use default filter tags convention"
default = "true"
}
variable "filter_tags_custom" {
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
default = "*"
}
variable "message" {
description = "Message sent when a monitor is triggered"
}
variable "delay" {
description = "Delay in seconds for the metric evaluation"
default = 900
}
#
# Filter variables
#
variable "project_id" {
type = "string"
description = "ID of the GCP Project"
}
#
# Sending Operations Count
#
variable "sending_operations_count_message" {
description = "Custom message for the GCP Pub/Sub Sending Operations Count monitor"
type = "string"
default = ""
}
variable "sending_operations_count_time_aggregator" {
description = "Timeframe for the GCP Pub/Sub Sending Operations Count monitor"
type = "string"
default = "sum"
}
variable "sending_operations_count_timeframe" {
description = "Timeframe for the GCP Pub/Sub Sending Operations Count monitor"
type = "string"
default = "last_30m"
}
variable "sending_operations_count_threshold_critical" {
description = "Critical threshold"
type = "string"
default = 0
}
variable "sending_operations_count_silenced" {
description = "Groups to mute for GCP Pub/Sub Sending Operations Count monitor"
type = "map"
default = {}
}
variable "sending_operations_count_extra_tags" {
description = "Extra tags for GCP Pub/Sub Sending Operations Count monitor"
type = "list"
default = []
}
#
# Unavailable Sending Operations Count
#
variable "unavailable_sending_operations_count_message" {
description = "Custom message for the GCP Pub/Sub Unavailable Sending Operations Count monitor"
type = "string"
default = ""
}
variable "unavailable_sending_operations_count_time_aggregator" {
description = "Timeframe for the GCP Pub/Sub Unavailable Sending Operations Count monitor"
type = "string"
default = "sum"
}
variable "unavailable_sending_operations_count_timeframe" {
description = "Timeframe for the GCP Pub/Sub Unavailable Sending Operations Count monitor"
type = "string"
default = "last_10m"
}
variable "unavailable_sending_operations_count_threshold_warning" {
description = "Warning threshold"
type = "string"
default = 2
}
variable "unavailable_sending_operations_count_threshold_critical" {
description = "Critical threshold"
type = "string"
default = 4
}
variable "unavailable_sending_operations_count_silenced" {
description = "Groups to mute for GCP Pub/Sub Unavailable Sending Operations Count monitor"
type = "map"
default = {}
}
variable "unavailable_sending_operations_count_extra_tags" {
description = "Extra tags for GCP Pub/Sub Unavailable Sending Operations Count monitor"
type = "list"
default = []
}

View File

@ -0,0 +1,99 @@
#
# FILTER
#
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ?
format("project_id:%s", var.project_id) :
"${var.filter_tags_custom}"}"
}
}
#
# Sending Operations Count
#
resource "datadog_monitor" "sending_operations_count" {
name = "[${var.environment}] GCP pubsub sending messages operations {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}"
message = "${coalesce(var.sending_operations_count_message, var.message)}"
type = "query alert"
query = <<EOF
${var.sending_operations_count_time_aggregator}(${var.sending_operations_count_timeframe}):
default(avg:gcp.pubsub.topic.send_message_operation_count{${data.template_file.filter.rendered}} by {topic_id}.as_count(), 0)
<= ${var.sending_operations_count_threshold_critical}
EOF
thresholds {
critical = "${var.sending_operations_count_threshold_critical}"
}
notify_audit = false
locked = false
timeout_h = 0
include_tags = true
no_data_timeframe = 45
require_full_window = false
notify_no_data = true
renotify_interval = 0
evaluation_delay = "${var.delay}"
new_host_delay = "${var.delay}"
silenced = "${var.sending_operations_count_silenced}"
tags = [
"team:gcp",
"provider:gcp",
"resource:pubsub",
"env:${var.environment}",
"created_by:terraform",
"${var.sending_operations_count_extra_tags}",
]
}
#
# Unavailable Sending Operations Count
#
resource "datadog_monitor" "unavailable_sending_operations_count" {
name = "[${var.environment}] GCP pubsub sending messages with result unavailable {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}"
message = "${coalesce(var.unavailable_sending_operations_count_message, var.message)}"
type = "query alert"
query = <<EOF
${var.unavailable_sending_operations_count_time_aggregator}(${var.unavailable_sending_operations_count_timeframe}):
default(avg:gcp.pubsub.topic.send_message_operation_count{${data.template_file.filter.rendered},response_code:unavailable} by {topic_id}.as_count(), 0)
>= ${var.unavailable_sending_operations_count_threshold_critical}
EOF
thresholds {
warning = "${var.unavailable_sending_operations_count_threshold_warning}"
critical = "${var.unavailable_sending_operations_count_threshold_critical}"
}
notify_audit = false
locked = false
timeout_h = 0
include_tags = true
no_data_timeframe = 25
require_full_window = false
notify_no_data = true
renotify_interval = 0
evaluation_delay = "${var.delay}"
new_host_delay = "${var.delay}"
silenced = "${var.unavailable_sending_operations_count_silenced}"
tags = [
"team:gcp",
"provider:gcp",
"resource:pubsub",
"env:${var.environment}",
"created_by:terraform",
"${var.unavailable_sending_operations_count_extra_tags}",
]
}