MON-247-monitors-for-ark-backups: Add ark_schedules_monitor
This commit is contained in:
parent
7a1b42b16a
commit
df866e4d4c
@ -75,6 +75,7 @@ The `//` is very important, it's a terraform specific syntax used to separate gi
|
||||
- [caas](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/)
|
||||
- [kubernetes](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/kubernetes/)
|
||||
- [ingress](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/kubernetes/ingress/)
|
||||
- [ark](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/k8s/ark/)
|
||||
- [cloud](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/)
|
||||
- [aws](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/)
|
||||
- [alb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/alb/)
|
||||
|
||||
83
caas/k8s/ark/README.md
Normal file
83
caas/k8s/ark/README.md
Normal file
@ -0,0 +1,83 @@
|
||||
Ark schedules monitor
|
||||
==========================================
|
||||
|
||||
How to use this module
|
||||
----------------------
|
||||
|
||||
```
|
||||
module "ark_schedules_monitor" {
|
||||
source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//caas/k8s/ark?ref={revision}"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${module.datadog-message-alerting.alerting-message}"
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
Purpose
|
||||
-------
|
||||
Creates DataDog monitor with the following checks :
|
||||
|
||||
* Ark schedules monitors
|
||||
|
||||
Inputs
|
||||
------
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| ark_schedules_monitor_message | Custom message for Ark schedules monitor | string | `` | no |
|
||||
| ark_schedules_monitor_no_data_timeframe | No data timeframe | string | `1440` | no |
|
||||
| ark_schedules_monitor_silenced | Groups to mute for Ark schedules monitor | map | `<map>` | no |
|
||||
| ark_schedules_monitor_timeframe | Monitor timeframe for Ark schedules monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_1d` | no |
|
||||
| delay | Delay in seconds for the metric evaluation | string | `60` | no |
|
||||
| environment | Architecture environment | string | - | yes |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| message | Message sent when a monitor is triggered | string | - | yes |
|
||||
| team | | string | `k8s` | no |
|
||||
|
||||
Outputs
|
||||
-------
|
||||
|
||||
| Name | Description |
|
||||
|------|-------------|
|
||||
| ark_schedules_monitor_id | id for monitor ark_schedules_monitor |
|
||||
|
||||
Related documentation
|
||||
---------------------
|
||||
|
||||
DataDog blog: https://www.datadoghq.com/blog/monitor-prometheus-metrics
|
||||
Heptio Ark minimum release: https://github.com/heptio/ark/releases/tag/v0.9.0
|
||||
|
||||
Ark annotations for Datadog
|
||||
---------------------------
|
||||
```
|
||||
apiVersion: apps/v1beta1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
namespace: heptio-ark
|
||||
name: ark
|
||||
spec:
|
||||
replicas: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
component: ark
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8085"
|
||||
prometheus.io/path: "/metrics"
|
||||
ad.datadoghq.com/ark.check_names: |-
|
||||
["prometheus"]
|
||||
ad.datadoghq.com/ark.init_configs: |-
|
||||
[{}]
|
||||
ad.datadoghq.com/ark.instances: |-
|
||||
[
|
||||
{
|
||||
"prometheus_url": "http://%%host%%:8085/metrics",
|
||||
"namespace": "ark",
|
||||
"metrics": ["ark_backup_*"],
|
||||
"tags": ["dd_monitoring:enabled","dd_k8s:enabled","env:prod"]
|
||||
}
|
||||
]
|
||||
```
|
||||
55
caas/k8s/ark/inputs.tf
Normal file
55
caas/k8s/ark/inputs.tf
Normal file
@ -0,0 +1,55 @@
|
||||
# Datadog global variables
|
||||
|
||||
variable "team" {
|
||||
type = "string"
|
||||
default = "k8s"
|
||||
}
|
||||
|
||||
variable "environment" {
|
||||
description = "Architecture environment"
|
||||
}
|
||||
|
||||
variable "filter_tags_use_defaults" {
|
||||
description = "Use default filter tags convention"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "filter_tags_custom" {
|
||||
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
|
||||
default = "*"
|
||||
}
|
||||
|
||||
variable "message" {
|
||||
description = "Message sent when a monitor is triggered"
|
||||
}
|
||||
|
||||
variable "delay" {
|
||||
description = "Delay in seconds for the metric evaluation"
|
||||
default = 60
|
||||
}
|
||||
|
||||
# Datadog monitors variables
|
||||
# N/A
|
||||
|
||||
variable "ark_schedules_monitor_message" {
|
||||
description = "Custom message for Ark schedules monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "ark_schedules_monitor_timeframe" {
|
||||
description = "Monitor timeframe for Ark schedules monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_1d"
|
||||
}
|
||||
|
||||
variable "ark_schedules_monitor_silenced" {
|
||||
description = "Groups to mute for Ark schedules monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "ark_schedules_monitor_no_data_timeframe" {
|
||||
description = "No data timeframe"
|
||||
default = 1440
|
||||
}
|
||||
39
caas/k8s/ark/monitors-ark.tf
Normal file
39
caas/k8s/ark/monitors-ark.tf
Normal file
@ -0,0 +1,39 @@
|
||||
data "template_file" "filter" {
|
||||
template = "$${filter}"
|
||||
|
||||
vars {
|
||||
filter = "${var.filter_tags_use_defaults == "true" ?
|
||||
format("dd_monitoring:enabled,dd_k8s:enabled,env:%s", var.environment) :
|
||||
"${var.filter_tags_custom}"}"
|
||||
}
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "ark_schedules_monitor" {
|
||||
name = "[${var.environment}] [${var.team}] Ark backup failed on {{schedule.name}}"
|
||||
type = "metric alert"
|
||||
message = "${coalesce(var.ark_schedules_monitor_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
sum(${var.ark_schedules_monitor_timeframe}):min:ark.ark_backup_failure_total{${data.template_file.filter.rendered}} by {schedule}.as_count() > 1
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
critical = 1
|
||||
warning = 0
|
||||
}
|
||||
|
||||
evaluation_delay = "${var.delay}"
|
||||
new_host_delay = "${var.delay}"
|
||||
no_data_timeframe = "${var.ark_schedules_monitor_no_data_timeframe}"
|
||||
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
|
||||
silenced = "${var.ark_schedules_monitor_silenced}"
|
||||
tags = ["team:${var.team}", "env:${var.environment}", "resource:ark", "provider:prometheus"]
|
||||
}
|
||||
4
caas/k8s/ark/outputs.tf
Normal file
4
caas/k8s/ark/outputs.tf
Normal file
@ -0,0 +1,4 @@
|
||||
output "ark_schedules_monitor_id" {
|
||||
description = "id for monitor ark_schedules_monitor"
|
||||
value = "${datadog_monitor.ark_schedules_monitor.id}"
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user