From a2ac8989bf9c7c951b7645adf9b1e44c1827feb6 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Tue, 20 Aug 2019 12:02:37 +0100 Subject: [PATCH] MON-400 monitors for pubsub topics and subscriptions --- README.md | 2 + cloud/gcp/pubsub/monitors-pubsub.tf | 73 ------- cloud/gcp/pubsub/subscription/README.md | 72 ++++++ cloud/gcp/pubsub/subscription/inputs.tf | 206 ++++++++++++++++++ .../subscription/monitors-subscription.tf | 128 +++++++++++ cloud/gcp/pubsub/subscription/outputs.tf | 15 ++ cloud/gcp/pubsub/{ => topic}/README.md | 19 +- cloud/gcp/pubsub/{ => topic}/inputs.tf | 44 ++++ cloud/gcp/pubsub/topic/monitors-topics.tf | 111 ++++++++++ cloud/gcp/pubsub/{ => topic}/outputs.tf | 5 + 10 files changed, 597 insertions(+), 78 deletions(-) delete mode 100644 cloud/gcp/pubsub/monitors-pubsub.tf create mode 100644 cloud/gcp/pubsub/subscription/README.md create mode 100644 cloud/gcp/pubsub/subscription/inputs.tf create mode 100644 cloud/gcp/pubsub/subscription/monitors-subscription.tf create mode 100644 cloud/gcp/pubsub/subscription/outputs.tf rename cloud/gcp/pubsub/{ => topic}/README.md (67%) rename cloud/gcp/pubsub/{ => topic}/inputs.tf (68%) create mode 100644 cloud/gcp/pubsub/topic/monitors-topics.tf rename cloud/gcp/pubsub/{ => topic}/outputs.tf (64%) diff --git a/README.md b/README.md index 64c5bf8..ab56880 100644 --- a/README.md +++ b/README.md @@ -184,6 +184,8 @@ The `//` is very important, it's a terraform specific syntax used to separate gi - [instance](https://git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors/tree/master/cloud/gcp/gce/instance/) - [lb](https://git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors/tree/master/cloud/gcp/lb/) - [pubsub](https://git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors/tree/master/cloud/gcp/pubsub/) + - [subscription](https://git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors/tree/master/cloud/gcp/pubsub/subscription/) + - [topic](https://git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors/tree/master/cloud/gcp/pubsub/topic/) - [common](https://git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors/tree/master/common/) - [alerting-message](https://git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors/tree/master/common/alerting-message/) - [filter-tags](https://git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors/tree/master/common/filter-tags/) diff --git a/cloud/gcp/pubsub/monitors-pubsub.tf b/cloud/gcp/pubsub/monitors-pubsub.tf deleted file mode 100644 index 90ea6fd..0000000 --- a/cloud/gcp/pubsub/monitors-pubsub.tf +++ /dev/null @@ -1,73 +0,0 @@ -# -# Sending Operations Count -# -resource "datadog_monitor" "sending_operations_count" { - count = var.sending_operations_count_enabled == "true" ? 1 : 0 - name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] GCP pubsub sending messages operations {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" - message = coalesce(var.sending_operations_count_message, var.message) - type = "query alert" - - query = <= ${var.unavailable_sending_operations_count_threshold_critical} -EOQ - - thresholds = { - warning = var.unavailable_sending_operations_count_threshold_warning - critical = var.unavailable_sending_operations_count_threshold_critical - } - - evaluation_delay = var.evaluation_delay - new_host_delay = var.new_host_delay - notify_audit = false - locked = false - timeout_h = 0 - include_tags = true - require_full_window = false - notify_no_data = false - renotify_interval = 0 - - tags = concat(["env:${var.environment}", "type:cloud", "provider:gcp", "resource:pubsub", "team:claranet", "created-by:terraform"], var.unavailable_sending_operations_count_extra_tags) - - lifecycle { - ignore_changes = ["silenced"] - } -} - diff --git a/cloud/gcp/pubsub/subscription/README.md b/cloud/gcp/pubsub/subscription/README.md new file mode 100644 index 0000000..14e8476 --- /dev/null +++ b/cloud/gcp/pubsub/subscription/README.md @@ -0,0 +1,72 @@ +# CLOUD GCP PUBSUB SUBSCRIPTION DataDog monitors + +## How to use this module + +``` +module "datadog-monitors-cloud-gcp-pubsub-subscription" { + source = "git::ssh://git@git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors.git//cloud/gcp/pubsub/subscription?ref={revision}" + + environment = var.environment + message = module.datadog-message-alerting.alerting-message +} + +``` + +## Purpose + +Creates DataDog monitors with the following checks: + +- GCP Pub/Sub Subscription Anomaly latency on push endpoint (disabled by default) +- GCP Pub/Sub Subscription Latency on push endpoint +- GCP Pub/Sub Subscription Oldest unacked message is + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| environment | Architecture environment | string | n/a | yes | +| evaluation\_delay | Delay in seconds for the metric evaluation | string | `"900"` | no | +| filter\_tags | Tags used for filtering | string | `"*"` | no | +| message | Message sent when a monitor is triggered | string | n/a | yes | +| new\_host\_delay | Delay in seconds for the new host evaluation | string | `"300"` | no | +| oldest\_unacked\_message\_age\_enabled | Flag to enable GCP Pub/Sub Subscription Oldest Unacked Message Age monitor | string | `"true"` | no | +| oldest\_unacked\_message\_age\_extra\_tags | Extra tags for GCP Pub/Sub Subscription Oldest Unacked Message Age monitor | list(string) | `[]` | no | +| oldest\_unacked\_message\_age\_message | Custom message for the GCP Pub/Sub Subscription Oldest Unacked Message Age monitor | string | `""` | no | +| oldest\_unacked\_message\_age\_threshold\_critical | GCP Pub/Sub Subscription Oldest Unacked Message Age critical threshold | string | `"120"` | no | +| oldest\_unacked\_message\_age\_threshold\_warning | GCP Pub/Sub Subscription Oldest Unacked Message Age warning threshold | string | `"30"` | no | +| oldest\_unacked\_message\_age\_time\_aggregator | Time aggregator for the GCP Pub/Sub Subscription Oldest Unacked Message Age monitor | string | `"min"` | no | +| oldest\_unacked\_message\_age\_timeframe | Timeframe for the GCP Pub/Sub Subscription Oldest Unacked Message Age monitor | string | `"last_5m"` | no | +| prefix\_slug | Prefix string to prepend between brackets on every monitors names | string | `""` | no | +| subscription\_push\_latency\_anomaly\_alert\_window | Alert window. | string | `"last_15m"` | no | +| subscription\_push\_latency\_anomaly\_count\_default\_zero | Count default zero. | string | `"true"` | no | +| subscription\_push\_latency\_anomaly\_detection\_algorithm | Anomaly Detection Algorithm used | string | `"basic"` | no | +| subscription\_push\_latency\_anomaly\_direction | Direction of the anomaly. It can be both, below or above. | string | `"above"` | no | +| subscription\_push\_latency\_anomaly\_enabled | Flag to enable GCP Pub/Sub Subscription Push Latency Anomaly monitor | string | `"false"` | no | +| subscription\_push\_latency\_anomaly\_extra\_tags | Extra tags for GCP Pub/Sub Subscription Push Latency Anomaly monitor | list(string) | `[]` | no | +| subscription\_push\_latency\_anomaly\_interval | Interval. | string | `"60"` | no | +| subscription\_push\_latency\_anomaly\_message | Custom message for the GCP Pub/Sub Subscription Push Latency Anomaly monitor | string | `""` | no | +| subscription\_push\_latency\_anomaly\_seasonality | Seasonality of the algorithm | string | `"daily"` | no | +| subscription\_push\_latency\_anomaly\_threshold\_critical | GCP Pub/Sub Subscription Push Latency Anomaly critical threshold | string | `"2"` | no | +| subscription\_push\_latency\_anomaly\_threshold\_warning | GCP Pub/Sub Subscription Push Latency Anomaly warning threshold | string | `"1"` | no | +| subscription\_push\_latency\_anomaly\_time\_aggregator | Time aggregator for the GCP Pub/Sub Subscription Push Latency Anomaly monitor | string | `"avg"` | no | +| subscription\_push\_latency\_anomaly\_timeframe | Timeframe for the GCP Pub/Sub Subscription Push Latency Anomaly monitor | string | `"last_10m"` | no | +| subscription\_push\_latency\_enabled | Flag to enable GCP Pub/Sub Subscription Push Latency High monitor | string | `"true"` | no | +| subscription\_push\_latency\_extra\_tags | Extra tags for GCP Pub/Sub Subscription Push Latency High monitor | list(string) | `[]` | no | +| subscription\_push\_latency\_message | Custom message for the GCP Pub/Sub Subscription Push Latency High monitor | string | `""` | no | +| subscription\_push\_latency\_threshold\_critical | GCP Pub/Sub Subscription Push Latency High critical threshold | string | `"5000"` | no | +| subscription\_push\_latency\_threshold\_warning | GCP Pub/Sub Subscription Push Latency High warning threshold | string | `"1000"` | no | +| subscription\_push\_latency\_time\_aggregator | Time aggregator for the GCP Pub/Sub Subscription Push Latency High monitor | string | `"avg"` | no | +| subscription\_push\_latency\_timeframe | Timeframe for the GCP Pub/Sub Subscription Push Latency High monitor | string | `"last_10m"` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| oldest\_unacked\_message\_age\_id | id for monitor oldest_unacked_message_age | +| subscription\_push\_latency\_anomaly\_id | id for monitor subscription_push_latency_anomaly | +| subscription\_push\_latency\_id | id for monitor subscription_push_latency | + +## Related documentation + +* [GCP Pub/Sub Metrics](https://cloud.google.com/monitoring/api/metrics_gcp#gcp-pubsub) +* [Datadog GCP Pub/Sub integration](https://docs.datadoghq.com/integrations/google_cloud_pubsub/) diff --git a/cloud/gcp/pubsub/subscription/inputs.tf b/cloud/gcp/pubsub/subscription/inputs.tf new file mode 100644 index 0000000..7227a64 --- /dev/null +++ b/cloud/gcp/pubsub/subscription/inputs.tf @@ -0,0 +1,206 @@ +# +# Datadog global variables +# +variable "environment" { + description = "Architecture environment" + type = string +} + +variable "filter_tags" { + description = "Tags used for filtering" + default = "*" +} + +variable "message" { + description = "Message sent when a monitor is triggered" +} + +variable "evaluation_delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +variable "new_host_delay" { + description = "Delay in seconds for the new host evaluation" + default = 300 +} + +variable "prefix_slug" { + description = "Prefix string to prepend between brackets on every monitors names" + default = "" +} + +# +# oldest_unacked_message_age +# + +variable "oldest_unacked_message_age_enabled" { + description = "Flag to enable GCP Pub/Sub Subscription Oldest Unacked Message Age monitor" + type = string + default = "true" +} + +variable "oldest_unacked_message_age_message" { + description = "Custom message for the GCP Pub/Sub Subscription Oldest Unacked Message Age monitor" + type = string + default = "" +} + +variable "oldest_unacked_message_age_time_aggregator" { + description = "Time aggregator for the GCP Pub/Sub Subscription Oldest Unacked Message Age monitor" + type = string + default = "min" +} + +variable "oldest_unacked_message_age_timeframe" { + description = "Timeframe for the GCP Pub/Sub Subscription Oldest Unacked Message Age monitor" + type = string + default = "last_5m" +} + +variable "oldest_unacked_message_age_threshold_warning" { + description = "GCP Pub/Sub Subscription Oldest Unacked Message Age warning threshold" + type = string + default = 30 +} + +variable "oldest_unacked_message_age_threshold_critical" { + description = "GCP Pub/Sub Subscription Oldest Unacked Message Age critical threshold" + type = string + default = 120 +} + +variable "oldest_unacked_message_age_extra_tags" { + description = "Extra tags for GCP Pub/Sub Subscription Oldest Unacked Message Age monitor" + type = list(string) + default = [] +} + +# +# subscription_push_latency +# +variable "subscription_push_latency_enabled" { + description = "Flag to enable GCP Pub/Sub Subscription Push Latency High monitor" + type = string + default = "true" +} + +variable "subscription_push_latency_message" { + description = "Custom message for the GCP Pub/Sub Subscription Push Latency High monitor" + type = string + default = "" +} + +variable "subscription_push_latency_time_aggregator" { + description = "Time aggregator for the GCP Pub/Sub Subscription Push Latency High monitor" + type = string + default = "avg" +} + +variable "subscription_push_latency_timeframe" { + description = "Timeframe for the GCP Pub/Sub Subscription Push Latency High monitor" + type = string + default = "last_10m" +} + +variable "subscription_push_latency_threshold_warning" { + description = "GCP Pub/Sub Subscription Push Latency High warning threshold" + type = string + default = 1000 +} + +variable "subscription_push_latency_threshold_critical" { + description = "GCP Pub/Sub Subscription Push Latency High critical threshold" + type = string + default = 5000 +} + +variable "subscription_push_latency_extra_tags" { + description = "Extra tags for GCP Pub/Sub Subscription Push Latency High monitor" + type = list(string) + default = [] +} + +# +# subscription_push_latency_anomaly +# +variable "subscription_push_latency_anomaly_enabled" { + description = "Flag to enable GCP Pub/Sub Subscription Push Latency Anomaly monitor" + type = string + default = "false" +} + +variable "subscription_push_latency_anomaly_message" { + description = "Custom message for the GCP Pub/Sub Subscription Push Latency Anomaly monitor" + type = string + default = "" +} + +variable "subscription_push_latency_anomaly_time_aggregator" { + description = "Time aggregator for the GCP Pub/Sub Subscription Push Latency Anomaly monitor" + type = string + default = "avg" +} + +variable "subscription_push_latency_anomaly_timeframe" { + description = "Timeframe for the GCP Pub/Sub Subscription Push Latency Anomaly monitor" + type = string + default = "last_10m" +} + +variable "subscription_push_latency_anomaly_detection_algorithm" { + description = "Anomaly Detection Algorithm used" + type = string + default = "basic" +} + +variable "subscription_push_latency_anomaly_direction" { + description = "Direction of the anomaly. It can be both, below or above." + type = string + default = "above" +} + +variable "subscription_push_latency_anomaly_alert_window" { + description = "Alert window." + type = string + default = "last_15m" +} + +variable "subscription_push_latency_anomaly_interval" { + description = "Interval." + type = string + default = 60 +} + +variable "subscription_push_latency_anomaly_count_default_zero" { + description = "Count default zero." + type = string + default = "true" +} + +variable "subscription_push_latency_anomaly_seasonality" { + description = "Seasonality of the algorithm" + type = string + default = "daily" +} + +variable "subscription_push_latency_anomaly_threshold_warning" { + description = "GCP Pub/Sub Subscription Push Latency Anomaly warning threshold" + type = string + default = 1 +} + +variable "subscription_push_latency_anomaly_threshold_critical" { + description = "GCP Pub/Sub Subscription Push Latency Anomaly critical threshold" + type = string + default = 2 +} + +variable "subscription_push_latency_anomaly_extra_tags" { + description = "Extra tags for GCP Pub/Sub Subscription Push Latency Anomaly monitor" + type = list(string) + default = [] +} + + + diff --git a/cloud/gcp/pubsub/subscription/monitors-subscription.tf b/cloud/gcp/pubsub/subscription/monitors-subscription.tf new file mode 100644 index 0000000..bc9e7b8 --- /dev/null +++ b/cloud/gcp/pubsub/subscription/monitors-subscription.tf @@ -0,0 +1,128 @@ +###################### +# All Subscriptions # +###################### + +# +# oldest_unacked_message_age +# +resource "datadog_monitor" "oldest_unacked_message_age" { + count = var.oldest_unacked_message_age_enabled == "true" ? 1 : 0 + name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] GCP Pub/Sub Subscription Oldest unacked message is {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}s old" + message = coalesce(var.oldest_unacked_message_age_message, var.message) + type = "query alert" + + query = <= ${var.oldest_unacked_message_age_threshold_critical} +EOQ + + thresholds = { + warning = var.oldest_unacked_message_age_threshold_warning + critical = var.oldest_unacked_message_age_threshold_critical + } + + evaluation_delay = var.evaluation_delay + new_host_delay = var.new_host_delay + notify_audit = false + locked = false + timeout_h = 0 + include_tags = true + require_full_window = false + notify_no_data = false + renotify_interval = 0 + + tags = concat(["env:${var.environment}", "type:cloud", "provider:gcp", "resource:pubsub", "pubsub_category:subscription", "team:claranet", "created-by:terraform"], var.oldest_unacked_message_age_extra_tags) + + lifecycle { + ignore_changes = ["silenced"] + } +} + +###################### +# Push Subscriptions # +###################### + +# +# subscription_push_latency +# +resource "datadog_monitor" "subscription_push_latency" { + count = var.subscription_push_latency_enabled == "true" ? 1 : 0 + name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] GCP Pub/Sub Subscription Latency on push endpoint {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = coalesce(var.subscription_push_latency_message, var.message) + type = "query alert" + + query = <= ${var.subscription_push_latency_threshold_critical} +EOQ + + thresholds = { + warning = var.subscription_push_latency_threshold_warning + critical = var.subscription_push_latency_threshold_critical + } + + evaluation_delay = var.evaluation_delay + new_host_delay = var.new_host_delay + notify_audit = false + locked = false + timeout_h = 0 + include_tags = true + require_full_window = false + notify_no_data = false + renotify_interval = 0 + + tags = concat(["env:${var.environment}", "type:cloud", "provider:gcp", "resource:pubsub", "pubsub_category:subscription", "team:claranet", "created-by:terraform"], var.subscription_push_latency_extra_tags) + + lifecycle { + ignore_changes = ["silenced"] + } +} + +# +# subscription_push_latency_anomaly +# +resource "datadog_monitor" "subscription_push_latency_anomaly" { + count = var.subscription_push_latency_anomaly_enabled == "true" ? 1 : 0 + name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] GCP Pub/Sub Subscription Anomaly latency on push endpoint {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = coalesce(var.subscription_push_latency_anomaly_message, var.message) + type = "query alert" + + query = <= ${var.subscription_push_latency_anomaly_threshold_critical} +EOQ + + thresholds = { + warning = var.subscription_push_latency_anomaly_threshold_warning + critical = var.subscription_push_latency_anomaly_threshold_critical + } + + evaluation_delay = var.evaluation_delay + new_host_delay = var.new_host_delay + notify_audit = false + locked = false + timeout_h = 0 + include_tags = true + require_full_window = false + notify_no_data = false + renotify_interval = 0 + + tags = concat(["env:${var.environment}", "type:cloud", "provider:gcp", "resource:pubsub", "pubsub_category:subscription", "team:claranet", "created-by:terraform"], var.subscription_push_latency_anomaly_extra_tags) + + lifecycle { + ignore_changes = ["silenced"] + } +} \ No newline at end of file diff --git a/cloud/gcp/pubsub/subscription/outputs.tf b/cloud/gcp/pubsub/subscription/outputs.tf new file mode 100644 index 0000000..13883e2 --- /dev/null +++ b/cloud/gcp/pubsub/subscription/outputs.tf @@ -0,0 +1,15 @@ +output "oldest_unacked_message_age_id" { + description = "id for monitor oldest_unacked_message_age" + value = datadog_monitor.oldest_unacked_message_age.*.id +} + +output "subscription_push_latency_id" { + description = "id for monitor subscription_push_latency" + value = datadog_monitor.subscription_push_latency.*.id +} + +output "subscription_push_latency_anomaly_id" { + description = "id for monitor subscription_push_latency_anomaly" + value = datadog_monitor.subscription_push_latency_anomaly.*.id +} + diff --git a/cloud/gcp/pubsub/README.md b/cloud/gcp/pubsub/topic/README.md similarity index 67% rename from cloud/gcp/pubsub/README.md rename to cloud/gcp/pubsub/topic/README.md index a896b88..9e31d84 100644 --- a/cloud/gcp/pubsub/README.md +++ b/cloud/gcp/pubsub/topic/README.md @@ -1,10 +1,10 @@ -# CLOUD GCP PUBSUB DataDog monitors +# CLOUD GCP PUBSUB TOPIC DataDog monitors ## How to use this module ``` -module "datadog-monitors-cloud-gcp-pubsub" { - source = "git::ssh://git@git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors.git//cloud/gcp/pubsub?ref={revision}" +module "datadog-monitors-cloud-gcp-pubsub-topic" { + source = "git::ssh://git@git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors.git//cloud/gcp/pubsub/topic?ref={revision}" environment = var.environment message = module.datadog-message-alerting.alerting-message @@ -16,8 +16,9 @@ module "datadog-monitors-cloud-gcp-pubsub" { Creates DataDog monitors with the following checks: -- GCP pubsub sending messages operations -- GCP pubsub sending messages with result unavailable +- GCP Pub/Sub Topic ratio of sending messages with result unavailable +- GCP Pub/Sub Topic sending messages operations +- GCP Pub/Sub Topic sending messages with result unavailable ## Inputs @@ -42,6 +43,13 @@ Creates DataDog monitors with the following checks: | unavailable\_sending\_operations\_count\_threshold\_warning | Warning threshold for the number of unavailable sending operations | string | `"2"` | no | | unavailable\_sending\_operations\_count\_time\_aggregator | Timeframe for the GCP Pub/Sub Unavailable Sending Operations Count monitor | string | `"sum"` | no | | unavailable\_sending\_operations\_count\_timeframe | Timeframe for the GCP Pub/Sub Unavailable Sending Operations Count monitor | string | `"last_10m"` | no | +| unavailable\_sending\_operations\_ratio\_enabled | Flag to enable GCP Pub/Sub Unavailable Sending Operations Ratio monitor | string | `"true"` | no | +| unavailable\_sending\_operations\_ratio\_extra\_tags | Extra tags for GCP Pub/Sub Unavailable Sending Operations Ratio monitor | list(string) | `[]` | no | +| unavailable\_sending\_operations\_ratio\_message | Custom message for the GCP Pub/Sub Unavailable Sending Operations Ratio monitor | string | `""` | no | +| unavailable\_sending\_operations\_ratio\_threshold\_critical | Critical threshold (%) for the ratio of unavailable sending operations | string | `"20"` | no | +| unavailable\_sending\_operations\_ratio\_threshold\_warning | Warning threshold (%) for the ratio of unavailable sending operations | string | `"10"` | no | +| unavailable\_sending\_operations\_ratio\_time\_aggregator | Timeframe for the GCP Pub/Sub Unavailable Sending Operations Ratio monitor | string | `"sum"` | no | +| unavailable\_sending\_operations\_ratio\_timeframe | Timeframe for the GCP Pub/Sub Unavailable Sending Operations Ratio monitor | string | `"last_10m"` | no | ## Outputs @@ -49,6 +57,7 @@ Creates DataDog monitors with the following checks: |------|-------------| | sending\_operations\_count\_id | id for monitor sending_operations_count | | unavailable\_sending\_operations\_count\_id | id for monitor unavailable_sending_operations_count | +| unavailable\_sending\_operations\_ratio\_id | id for monitor unavailable_sending_operations_ratio | ## Related documentation diff --git a/cloud/gcp/pubsub/inputs.tf b/cloud/gcp/pubsub/topic/inputs.tf similarity index 68% rename from cloud/gcp/pubsub/inputs.tf rename to cloud/gcp/pubsub/topic/inputs.tf index 8c55a19..5908214 100644 --- a/cloud/gcp/pubsub/inputs.tf +++ b/cloud/gcp/pubsub/topic/inputs.tf @@ -114,3 +114,47 @@ variable "unavailable_sending_operations_count_extra_tags" { default = [] } +# +# Unavailable Sending Operations Ratio +# +variable "unavailable_sending_operations_ratio_message" { + description = "Custom message for the GCP Pub/Sub Unavailable Sending Operations Ratio monitor" + type = string + default = "" +} + +variable "unavailable_sending_operations_ratio_time_aggregator" { + description = "Timeframe for the GCP Pub/Sub Unavailable Sending Operations Ratio monitor" + type = string + default = "sum" +} + +variable "unavailable_sending_operations_ratio_timeframe" { + description = "Timeframe for the GCP Pub/Sub Unavailable Sending Operations Ratio monitor" + type = string + default = "last_10m" +} + +variable "unavailable_sending_operations_ratio_threshold_warning" { + description = "Warning threshold (%) for the ratio of unavailable sending operations" + type = string + default = 10 +} + +variable "unavailable_sending_operations_ratio_threshold_critical" { + description = "Critical threshold (%) for the ratio of unavailable sending operations" + type = string + default = 20 +} + +variable "unavailable_sending_operations_ratio_enabled" { + description = "Flag to enable GCP Pub/Sub Unavailable Sending Operations Ratio monitor" + type = string + default = "true" +} + +variable "unavailable_sending_operations_ratio_extra_tags" { + description = "Extra tags for GCP Pub/Sub Unavailable Sending Operations Ratio monitor" + type = list(string) + default = [] +} diff --git a/cloud/gcp/pubsub/topic/monitors-topics.tf b/cloud/gcp/pubsub/topic/monitors-topics.tf new file mode 100644 index 0000000..927e664 --- /dev/null +++ b/cloud/gcp/pubsub/topic/monitors-topics.tf @@ -0,0 +1,111 @@ +# +# Sending Operations Count +# +resource "datadog_monitor" "sending_operations_count" { + count = var.sending_operations_count_enabled == "true" ? 1 : 0 + name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] GCP Pub/Sub Topic sending messages operations {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = coalesce(var.sending_operations_count_message, var.message) + type = "query alert" + + query = <= ${var.unavailable_sending_operations_count_threshold_critical} +EOQ + + thresholds = { + warning = var.unavailable_sending_operations_count_threshold_warning + critical = var.unavailable_sending_operations_count_threshold_critical + } + + evaluation_delay = var.evaluation_delay + new_host_delay = var.new_host_delay + notify_audit = false + locked = false + timeout_h = 0 + include_tags = true + require_full_window = false + notify_no_data = false + renotify_interval = 0 + + tags = concat(["env:${var.environment}", "type:cloud", "provider:gcp", "resource:pubsub", "pubsub_category:topic", "team:claranet", "created-by:terraform"], var.unavailable_sending_operations_count_extra_tags) + + lifecycle { + ignore_changes = ["silenced"] + } +} + +# +# Unavailable Sending Operations Ratio +# +resource "datadog_monitor" "unavailable_sending_operations_ratio" { + count = var.unavailable_sending_operations_ratio_enabled == "true" ? 1 : 0 + name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] GCP Pub/Sub Topic ratio of sending messages with result unavailable {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = coalesce(var.unavailable_sending_operations_ratio_message, var.message) + type = "query alert" + + query = <= ${var.unavailable_sending_operations_ratio_threshold_critical} +EOQ + + thresholds = { + warning = var.unavailable_sending_operations_ratio_threshold_warning + critical = var.unavailable_sending_operations_ratio_threshold_critical + } + + evaluation_delay = var.evaluation_delay + new_host_delay = var.new_host_delay + notify_audit = false + locked = false + timeout_h = 0 + include_tags = true + require_full_window = false + notify_no_data = false + renotify_interval = 0 + + tags = concat(["env:${var.environment}", "type:cloud", "provider:gcp", "resource:pubsub", "pubsub_category:topic", "team:claranet", "created-by:terraform"], var.unavailable_sending_operations_ratio_extra_tags) + + lifecycle { + ignore_changes = ["silenced"] + } +} diff --git a/cloud/gcp/pubsub/outputs.tf b/cloud/gcp/pubsub/topic/outputs.tf similarity index 64% rename from cloud/gcp/pubsub/outputs.tf rename to cloud/gcp/pubsub/topic/outputs.tf index a08edb8..4452e91 100644 --- a/cloud/gcp/pubsub/outputs.tf +++ b/cloud/gcp/pubsub/topic/outputs.tf @@ -8,3 +8,8 @@ output "unavailable_sending_operations_count_id" { value = datadog_monitor.unavailable_sending_operations_count.*.id } +output "unavailable_sending_operations_ratio_id" { + description = "id for monitor unavailable_sending_operations_ratio" + value = datadog_monitor.unavailable_sending_operations_ratio.*.id +} +