MON-247-monitors-for-ark-backups: Add ark_schedules_monitor

This commit is contained in:
Alex Lemaresquier 2018-07-16 20:07:30 +02:00 committed by Quentin Manfroi
parent 7a1b42b16a
commit df866e4d4c
5 changed files with 182 additions and 0 deletions

View File

@ -75,6 +75,7 @@ The `//` is very important, it's a terraform specific syntax used to separate gi
- [caas](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/)
- [kubernetes](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/kubernetes/)
- [ingress](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/kubernetes/ingress/)
- [ark](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/k8s/ark/)
- [cloud](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/)
- [aws](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/)
- [alb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/alb/)

83
caas/k8s/ark/README.md Normal file
View File

@ -0,0 +1,83 @@
Ark schedules monitor
==========================================
How to use this module
----------------------
```
module "ark_schedules_monitor" {
source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//caas/k8s/ark?ref={revision}"
environment = "${var.environment}"
message = "${module.datadog-message-alerting.alerting-message}"
}
```
Purpose
-------
Creates DataDog monitor with the following checks :
* Ark schedules monitors
Inputs
------
| Name | Description | Type | Default | Required |
|------|-------------|:----:|:-----:|:-----:|
| ark_schedules_monitor_message | Custom message for Ark schedules monitor | string | `` | no |
| ark_schedules_monitor_no_data_timeframe | No data timeframe | string | `1440` | no |
| ark_schedules_monitor_silenced | Groups to mute for Ark schedules monitor | map | `<map>` | no |
| ark_schedules_monitor_timeframe | Monitor timeframe for Ark schedules monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_1d` | no |
| delay | Delay in seconds for the metric evaluation | string | `60` | no |
| environment | Architecture environment | string | - | yes |
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
| message | Message sent when a monitor is triggered | string | - | yes |
| team | | string | `k8s` | no |
Outputs
-------
| Name | Description |
|------|-------------|
| ark_schedules_monitor_id | id for monitor ark_schedules_monitor |
Related documentation
---------------------
DataDog blog: https://www.datadoghq.com/blog/monitor-prometheus-metrics
Heptio Ark minimum release: https://github.com/heptio/ark/releases/tag/v0.9.0
Ark annotations for Datadog
---------------------------
```
apiVersion: apps/v1beta1
kind: Deployment
metadata:
namespace: heptio-ark
name: ark
spec:
replicas: 1
template:
metadata:
labels:
component: ark
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8085"
prometheus.io/path: "/metrics"
ad.datadoghq.com/ark.check_names: |-
["prometheus"]
ad.datadoghq.com/ark.init_configs: |-
[{}]
ad.datadoghq.com/ark.instances: |-
[
{
"prometheus_url": "http://%%host%%:8085/metrics",
"namespace": "ark",
"metrics": ["ark_backup_*"],
"tags": ["dd_monitoring:enabled","dd_k8s:enabled","env:prod"]
}
]
```

55
caas/k8s/ark/inputs.tf Normal file
View File

@ -0,0 +1,55 @@
# Datadog global variables
variable "team" {
type = "string"
default = "k8s"
}
variable "environment" {
description = "Architecture environment"
}
variable "filter_tags_use_defaults" {
description = "Use default filter tags convention"
default = "true"
}
variable "filter_tags_custom" {
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
default = "*"
}
variable "message" {
description = "Message sent when a monitor is triggered"
}
variable "delay" {
description = "Delay in seconds for the metric evaluation"
default = 60
}
# Datadog monitors variables
# N/A
variable "ark_schedules_monitor_message" {
description = "Custom message for Ark schedules monitor"
type = "string"
default = ""
}
variable "ark_schedules_monitor_timeframe" {
description = "Monitor timeframe for Ark schedules monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "string"
default = "last_1d"
}
variable "ark_schedules_monitor_silenced" {
description = "Groups to mute for Ark schedules monitor"
type = "map"
default = {}
}
variable "ark_schedules_monitor_no_data_timeframe" {
description = "No data timeframe"
default = 1440
}

View File

@ -0,0 +1,39 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ?
format("dd_monitoring:enabled,dd_k8s:enabled,env:%s", var.environment) :
"${var.filter_tags_custom}"}"
}
}
resource "datadog_monitor" "ark_schedules_monitor" {
name = "[${var.environment}] [${var.team}] Ark backup failed on {{schedule.name}}"
type = "metric alert"
message = "${coalesce(var.ark_schedules_monitor_message, var.message)}"
query = <<EOF
sum(${var.ark_schedules_monitor_timeframe}):min:ark.ark_backup_failure_total{${data.template_file.filter.rendered}} by {schedule}.as_count() > 1
EOF
thresholds {
critical = 1
warning = 0
}
evaluation_delay = "${var.delay}"
new_host_delay = "${var.delay}"
no_data_timeframe = "${var.ark_schedules_monitor_no_data_timeframe}"
notify_no_data = true
renotify_interval = 0
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = false
silenced = "${var.ark_schedules_monitor_silenced}"
tags = ["team:${var.team}", "env:${var.environment}", "resource:ark", "provider:prometheus"]
}

4
caas/k8s/ark/outputs.tf Normal file
View File

@ -0,0 +1,4 @@
output "ark_schedules_monitor_id" {
description = "id for monitor ark_schedules_monitor"
value = "${datadog_monitor.ark_schedules_monitor.id}"
}