MON-247-monitors-for-ark-backups: Add ark_schedules_monitor
This commit is contained in:
parent
7a1b42b16a
commit
df866e4d4c
@ -75,6 +75,7 @@ The `//` is very important, it's a terraform specific syntax used to separate gi
|
|||||||
- [caas](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/)
|
- [caas](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/)
|
||||||
- [kubernetes](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/kubernetes/)
|
- [kubernetes](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/kubernetes/)
|
||||||
- [ingress](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/kubernetes/ingress/)
|
- [ingress](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/kubernetes/ingress/)
|
||||||
|
- [ark](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/k8s/ark/)
|
||||||
- [cloud](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/)
|
- [cloud](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/)
|
||||||
- [aws](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/)
|
- [aws](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/)
|
||||||
- [alb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/alb/)
|
- [alb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/alb/)
|
||||||
|
|||||||
83
caas/k8s/ark/README.md
Normal file
83
caas/k8s/ark/README.md
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
Ark schedules monitor
|
||||||
|
==========================================
|
||||||
|
|
||||||
|
How to use this module
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
```
|
||||||
|
module "ark_schedules_monitor" {
|
||||||
|
source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//caas/k8s/ark?ref={revision}"
|
||||||
|
|
||||||
|
environment = "${var.environment}"
|
||||||
|
message = "${module.datadog-message-alerting.alerting-message}"
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Purpose
|
||||||
|
-------
|
||||||
|
Creates DataDog monitor with the following checks :
|
||||||
|
|
||||||
|
* Ark schedules monitors
|
||||||
|
|
||||||
|
Inputs
|
||||||
|
------
|
||||||
|
|
||||||
|
| Name | Description | Type | Default | Required |
|
||||||
|
|------|-------------|:----:|:-----:|:-----:|
|
||||||
|
| ark_schedules_monitor_message | Custom message for Ark schedules monitor | string | `` | no |
|
||||||
|
| ark_schedules_monitor_no_data_timeframe | No data timeframe | string | `1440` | no |
|
||||||
|
| ark_schedules_monitor_silenced | Groups to mute for Ark schedules monitor | map | `<map>` | no |
|
||||||
|
| ark_schedules_monitor_timeframe | Monitor timeframe for Ark schedules monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_1d` | no |
|
||||||
|
| delay | Delay in seconds for the metric evaluation | string | `60` | no |
|
||||||
|
| environment | Architecture environment | string | - | yes |
|
||||||
|
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||||
|
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||||
|
| message | Message sent when a monitor is triggered | string | - | yes |
|
||||||
|
| team | | string | `k8s` | no |
|
||||||
|
|
||||||
|
Outputs
|
||||||
|
-------
|
||||||
|
|
||||||
|
| Name | Description |
|
||||||
|
|------|-------------|
|
||||||
|
| ark_schedules_monitor_id | id for monitor ark_schedules_monitor |
|
||||||
|
|
||||||
|
Related documentation
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
DataDog blog: https://www.datadoghq.com/blog/monitor-prometheus-metrics
|
||||||
|
Heptio Ark minimum release: https://github.com/heptio/ark/releases/tag/v0.9.0
|
||||||
|
|
||||||
|
Ark annotations for Datadog
|
||||||
|
---------------------------
|
||||||
|
```
|
||||||
|
apiVersion: apps/v1beta1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
namespace: heptio-ark
|
||||||
|
name: ark
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
component: ark
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
prometheus.io/port: "8085"
|
||||||
|
prometheus.io/path: "/metrics"
|
||||||
|
ad.datadoghq.com/ark.check_names: |-
|
||||||
|
["prometheus"]
|
||||||
|
ad.datadoghq.com/ark.init_configs: |-
|
||||||
|
[{}]
|
||||||
|
ad.datadoghq.com/ark.instances: |-
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"prometheus_url": "http://%%host%%:8085/metrics",
|
||||||
|
"namespace": "ark",
|
||||||
|
"metrics": ["ark_backup_*"],
|
||||||
|
"tags": ["dd_monitoring:enabled","dd_k8s:enabled","env:prod"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
55
caas/k8s/ark/inputs.tf
Normal file
55
caas/k8s/ark/inputs.tf
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
# Datadog global variables
|
||||||
|
|
||||||
|
variable "team" {
|
||||||
|
type = "string"
|
||||||
|
default = "k8s"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "environment" {
|
||||||
|
description = "Architecture environment"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "filter_tags_use_defaults" {
|
||||||
|
description = "Use default filter tags convention"
|
||||||
|
default = "true"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "filter_tags_custom" {
|
||||||
|
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
|
||||||
|
default = "*"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "message" {
|
||||||
|
description = "Message sent when a monitor is triggered"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "delay" {
|
||||||
|
description = "Delay in seconds for the metric evaluation"
|
||||||
|
default = 60
|
||||||
|
}
|
||||||
|
|
||||||
|
# Datadog monitors variables
|
||||||
|
# N/A
|
||||||
|
|
||||||
|
variable "ark_schedules_monitor_message" {
|
||||||
|
description = "Custom message for Ark schedules monitor"
|
||||||
|
type = "string"
|
||||||
|
default = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "ark_schedules_monitor_timeframe" {
|
||||||
|
description = "Monitor timeframe for Ark schedules monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||||
|
type = "string"
|
||||||
|
default = "last_1d"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "ark_schedules_monitor_silenced" {
|
||||||
|
description = "Groups to mute for Ark schedules monitor"
|
||||||
|
type = "map"
|
||||||
|
default = {}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "ark_schedules_monitor_no_data_timeframe" {
|
||||||
|
description = "No data timeframe"
|
||||||
|
default = 1440
|
||||||
|
}
|
||||||
39
caas/k8s/ark/monitors-ark.tf
Normal file
39
caas/k8s/ark/monitors-ark.tf
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
data "template_file" "filter" {
|
||||||
|
template = "$${filter}"
|
||||||
|
|
||||||
|
vars {
|
||||||
|
filter = "${var.filter_tags_use_defaults == "true" ?
|
||||||
|
format("dd_monitoring:enabled,dd_k8s:enabled,env:%s", var.environment) :
|
||||||
|
"${var.filter_tags_custom}"}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "datadog_monitor" "ark_schedules_monitor" {
|
||||||
|
name = "[${var.environment}] [${var.team}] Ark backup failed on {{schedule.name}}"
|
||||||
|
type = "metric alert"
|
||||||
|
message = "${coalesce(var.ark_schedules_monitor_message, var.message)}"
|
||||||
|
|
||||||
|
query = <<EOF
|
||||||
|
sum(${var.ark_schedules_monitor_timeframe}):min:ark.ark_backup_failure_total{${data.template_file.filter.rendered}} by {schedule}.as_count() > 1
|
||||||
|
EOF
|
||||||
|
|
||||||
|
thresholds {
|
||||||
|
critical = 1
|
||||||
|
warning = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
evaluation_delay = "${var.delay}"
|
||||||
|
new_host_delay = "${var.delay}"
|
||||||
|
no_data_timeframe = "${var.ark_schedules_monitor_no_data_timeframe}"
|
||||||
|
|
||||||
|
notify_no_data = true
|
||||||
|
renotify_interval = 0
|
||||||
|
notify_audit = false
|
||||||
|
timeout_h = 0
|
||||||
|
include_tags = true
|
||||||
|
locked = false
|
||||||
|
require_full_window = false
|
||||||
|
|
||||||
|
silenced = "${var.ark_schedules_monitor_silenced}"
|
||||||
|
tags = ["team:${var.team}", "env:${var.environment}", "resource:ark", "provider:prometheus"]
|
||||||
|
}
|
||||||
4
caas/k8s/ark/outputs.tf
Normal file
4
caas/k8s/ark/outputs.tf
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
output "ark_schedules_monitor_id" {
|
||||||
|
description = "id for monitor ark_schedules_monitor"
|
||||||
|
value = "${datadog_monitor.ark_schedules_monitor.id}"
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user