Add Memorystore Redis monitor for system memory usage ratio

This commit is contained in:
Patrick Decat 2021-01-27 14:10:16 +01:00
parent ab489a846c
commit 34323c6565
7 changed files with 201 additions and 0 deletions

View File

@ -203,6 +203,8 @@ For example, this will regenerate every READMEs thanks to [terraform-docs](https
- [gce](https://github.com/claranet/terraform-datadog-monitors/tree/master/cloud/gcp/gce/)
- [instance](https://github.com/claranet/terraform-datadog-monitors/tree/master/cloud/gcp/gce/instance/)
- [lb](https://github.com/claranet/terraform-datadog-monitors/tree/master/cloud/gcp/lb/)
- [memorystore](https://github.com/claranet/terraform-datadog-monitors/tree/master/cloud/gcp/memorystore/)
- [redis](https://github.com/claranet/terraform-datadog-monitors/tree/master/cloud/gcp/memorystore/redis/)
- [pubsub](https://github.com/claranet/terraform-datadog-monitors/tree/master/cloud/gcp/pubsub/)
- [subscription](https://github.com/claranet/terraform-datadog-monitors/tree/master/cloud/gcp/pubsub/subscription/)
- [topic](https://github.com/claranet/terraform-datadog-monitors/tree/master/cloud/gcp/pubsub/topic/)

View File

@ -0,0 +1 @@
cloud-gcp

View File

@ -0,0 +1,59 @@
# CLOUD GCP MEMORYSTORE REDIS DataDog monitors
## How to use this module
```hcl
module "datadog-monitors-cloud-gcp-memorystore-redis" {
source = "claranet/monitors/datadog//cloud/gcp/memorystore/redis"
version = "{revision}"
environment = var.environment
message = module.datadog-message-alerting.alerting-message
}
```
## Purpose
Creates DataDog monitors with the following checks:
- Memorystore Redis system memory usage ratio
## Requirements
| Name | Version |
|------|---------|
| terraform | >= 0.12.26 |
## Inputs
| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| environment | Architecture environment | `string` | n/a | yes |
| evaluation\_delay | Delay in seconds for the metric evaluation | `number` | `900` | no |
| filter\_tags | Tags used for filtering | `string` | `"*"` | no |
| message | Message sent when a monitor is triggered | `any` | n/a | yes |
| new\_host\_delay | Delay in seconds for the new host evaluation | `number` | `300` | no |
| notify\_no\_data | Will raise no data alert if set to true | `bool` | `true` | no |
| prefix\_slug | Prefix string to prepend between brackets on every monitors names | `string` | `""` | no |
| system\_memory\_usage\_ratio\_enabled | Flag to enable GCP Memorystore Redis System memory usage ratio monitor | `string` | `"true"` | no |
| system\_memory\_usage\_ratio\_extra\_tags | Extra tags for GCP Memorystore Redis System memory usage ratio monitor | `list(string)` | `[]` | no |
| system\_memory\_usage\_ratio\_message | Custom message for Memorystore Redis System memory usage ratio monitor | `string` | `""` | no |
| system\_memory\_usage\_ratio\_no\_data\_timeframe | Number of minutes before reporting no data | `string` | `20` | no |
| system\_memory\_usage\_ratio\_threshold\_critical | Memorystore Redis System memory usage ratio critical threshold | `string` | `90` | no |
| system\_memory\_usage\_ratio\_threshold\_warning | Memorystore Redis System memory usage ratio warning threshold | `string` | `80` | no |
| system\_memory\_usage\_ratio\_time\_aggregator | Time aggregator for Memorystore Redis System memory usage ratio monitor | `string` | `"min"` | no |
| system\_memory\_usage\_ratio\_timeframe | Timeframe for Memorystore Redis System memory usage ratio monitor | `string` | `"last_10m"` | no |
## Outputs
| Name | Description |
|------|-------------|
| system\_memory\_usage\_ratio\_id | id for monitor system\_memory\_usage\_ratio |
## Related documentation
* [GCP Metrics for Memorystore Redis](https://cloud.google.com/monitoring/api/metrics_gcp#gcp-redis)
* [Datadog integration for GCP Memorystore Redis](https://docs.datadoghq.com/integrations/google_cloud_redis/)
* [Memory management best practices](https://cloud.google.com/memorystore/docs/redis/memory-management-best-practices#monitor_your_instances_memory_usage)
* [Monitoring System memory usage ratio](https://cloud.google.com/memorystore/docs/redis/memory-management-best-practices#system_memory_usage_ratio_2)

View File

@ -0,0 +1,89 @@
#
# Datadog global variables
#
variable "environment" {
description = "Architecture environment"
type = string
}
variable "filter_tags" {
description = "Tags used for filtering"
default = "*"
}
variable "message" {
description = "Message sent when a monitor is triggered"
}
variable "evaluation_delay" {
description = "Delay in seconds for the metric evaluation"
default = 900
}
variable "new_host_delay" {
description = "Delay in seconds for the new host evaluation"
default = 300
}
variable "prefix_slug" {
description = "Prefix string to prepend between brackets on every monitors names"
default = ""
}
variable "notify_no_data" {
description = "Will raise no data alert if set to true"
default = true
}
variable "system_memory_usage_ratio_no_data_timeframe" {
description = "Number of minutes before reporting no data"
type = string
default = 20
}
#
# System memory usage ratio
#
variable "system_memory_usage_ratio_message" {
description = "Custom message for Memorystore Redis System memory usage ratio monitor"
type = string
default = ""
}
variable "system_memory_usage_ratio_time_aggregator" {
description = "Time aggregator for Memorystore Redis System memory usage ratio monitor"
type = string
default = "min"
}
variable "system_memory_usage_ratio_timeframe" {
description = "Timeframe for Memorystore Redis System memory usage ratio monitor"
type = string
default = "last_10m"
}
variable "system_memory_usage_ratio_threshold_warning" {
description = "Memorystore Redis System memory usage ratio warning threshold"
type = string
default = 80
}
variable "system_memory_usage_ratio_threshold_critical" {
description = "Memorystore Redis System memory usage ratio critical threshold"
type = string
default = 90
}
variable "system_memory_usage_ratio_enabled" {
description = "Flag to enable GCP Memorystore Redis System memory usage ratio monitor"
type = string
default = "true"
}
variable "system_memory_usage_ratio_extra_tags" {
description = "Extra tags for GCP Memorystore Redis System memory usage ratio monitor"
type = list(string)
default = []
}

View File

@ -0,0 +1,37 @@
#
# System memory usage ratio
#
resource "datadog_monitor" "system_memory_usage_ratio" {
count = var.system_memory_usage_ratio_enabled == "true" ? 1 : 0
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Memorystore Redis system memory usage ratio {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}"
message = coalesce(var.system_memory_usage_ratio_message, var.message)
type = "metric alert"
query = <<EOQ
${var.system_memory_usage_ratio_time_aggregator}(${var.system_memory_usage_ratio_timeframe}):
avg:gcp.redis.stats.memory.system_memory_usage_ratio{${var.filter_tags}} by {instance_id,node_id,role}
> ${var.system_memory_usage_ratio_threshold_critical}
EOQ
thresholds = {
critical = var.system_memory_usage_ratio_threshold_critical
warning = var.system_memory_usage_ratio_threshold_warning
}
evaluation_delay = var.evaluation_delay
new_host_delay = var.new_host_delay
notify_audit = false
locked = false
timeout_h = 0
include_tags = true
require_full_window = false
notify_no_data = var.notify_no_data
no_data_timeframe = var.system_memory_usage_ratio_no_data_timeframe
renotify_interval = 0
tags = concat(["env:${var.environment}", "type:cloud", "provider:gcp", "resource:memorystore", "team:claranet", "created-by:terraform", "engine:redis"], var.system_memory_usage_ratio_extra_tags)
lifecycle {
ignore_changes = [silenced]
}
}

View File

@ -0,0 +1,5 @@
output "system_memory_usage_ratio_id" {
description = "id for monitor system_memory_usage_ratio"
value = datadog_monitor.system_memory_usage_ratio.*.id
}

View File

@ -0,0 +1,8 @@
terraform {
required_providers {
datadog = {
source = "terraform-providers/datadog"
}
}
required_version = ">= 0.12.26"
}