Merged in MON-247-monitors-for-ark-backups (pull request #107)

MON-247 monitors for ark backups

Approved-by: Alex Lemaresquier <alex+bitbucket@lemaresquier.org>
Approved-by: Boris Rousseau <boris.rousseau@morea.fr>
Approved-by: Christophe GENINET <christophe.geninet@fr.clara.net>
This commit is contained in:
Alex Lemaresquier 2018-09-03 08:40:48 +00:00 committed by Quentin Manfroi
commit cde7ec8845
6 changed files with 190 additions and 0 deletions

View File

@ -74,6 +74,7 @@ The `//` is very important, it's a terraform specific syntax used to separate gi
- [caas](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/)
- [kubernetes](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/kubernetes/)
- [ark](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/kubernetes/ark/)
- [ingress](https://bitbucket.org/morea/terraform.feature.datadog/src/master/caas/kubernetes/ingress/)
- [cloud](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/)
- [aws](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/)

View File

@ -0,0 +1,81 @@
# CAAS KUBERNETES ARK DataDog monitors
## How to use this module
```
module "datadog-monitors-caas-kubernetes-ark" {
source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//caas/kubernetes/ark?ref={revision}"
environment = "${var.environment}"
message = "${module.datadog-message-alerting.alerting-message}"
}
```
## Purpose
Creates DataDog monitors with the following checks:
- Ark backup failed
## Inputs
| Name | Description | Type | Default | Required |
|------|-------------|:----:|:-----:|:-----:|
| ark_schedules_enabled | Flag to enable Ark schedules monitor | string | `true` | no |
| ark_schedules_extra_tags | Extra tags for Ark schedules monitor | list | `<list>` | no |
| ark_schedules_monitor_message | Custom message for Ark schedules monitor | string | `` | no |
| ark_schedules_monitor_no_data_timeframe | No data timeframe in minutes | string | `1440` | no |
| ark_schedules_monitor_silenced | Groups to mute for Ark schedules monitor | map | `<map>` | no |
| ark_schedules_monitor_timeframe | Monitor timeframe for Ark schedules monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_1d` | no |
| environment | Architecture environment | string | - | yes |
| evaluation_delay | Delay in seconds for the metric evaluation | string | `15` | no |
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
| message | Message sent when a monitor is triggered | string | - | yes |
| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no |
## Outputs
| Name | Description |
|------|-------------|
| ark_schedules_monitor_id | id for monitor ark_schedules_monitor |
Related documentation
---------------------
DataDog blog: https://www.datadoghq.com/blog/monitor-prometheus-metrics
Heptio Ark minimum release: https://github.com/heptio/ark/releases/tag/v0.9.0
Ark annotations for Datadog
---------------------------
```
apiVersion: apps/v1beta1
kind: Deployment
metadata:
namespace: heptio-ark
name: ark
spec:
replicas: 1
template:
metadata:
labels:
component: ark
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8085"
prometheus.io/path: "/metrics"
ad.datadoghq.com/ark.check_names: |-
["prometheus"]
ad.datadoghq.com/ark.init_configs: |-
[{}]
ad.datadoghq.com/ark.instances: |-
[
{
"prometheus_url": "http://%%host%%:8085/metrics",
"namespace": "ark",
"metrics": ["ark_backup_*"],
"tags": ["dd_monitoring:enabled","dd_ark:enabled","env:prod"]
}
]
```

View File

@ -0,0 +1,66 @@
# Datadog global variables
variable "environment" {
description = "Architecture environment"
}
variable "filter_tags_use_defaults" {
description = "Use default filter tags convention"
default = "true"
}
variable "filter_tags_custom" {
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
default = "*"
}
variable "message" {
description = "Message sent when a monitor is triggered"
}
variable "evaluation_delay" {
description = "Delay in seconds for the metric evaluation"
default = 15
}
variable "new_host_delay" {
description = "Delay in seconds before monitor new resource"
default = 300
}
# Datadog monitors variables
variable "ark_schedules_monitor_message" {
description = "Custom message for Ark schedules monitor"
type = "string"
default = ""
}
variable "ark_schedules_monitor_timeframe" {
description = "Monitor timeframe for Ark schedules monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "string"
default = "last_1d"
}
variable "ark_schedules_monitor_silenced" {
description = "Groups to mute for Ark schedules monitor"
type = "map"
default = {}
}
variable "ark_schedules_enabled" {
description = "Flag to enable Ark schedules monitor"
type = "string"
default = "true"
}
variable "ark_schedules_extra_tags" {
description = "Extra tags for Ark schedules monitor"
type = "list"
default = []
}
variable "ark_schedules_monitor_no_data_timeframe" {
description = "No data timeframe in minutes"
default = 1440
}

View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../../common/filter-tags"
environment = "${var.environment}"
resource = "ark"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -0,0 +1,30 @@
resource "datadog_monitor" "ark_schedules_monitor" {
count = "${var.ark_schedules_enabled ? 1 : 0}"
name = "[${var.environment}] Ark backup failed"
type = "metric alert"
message = "${coalesce(var.ark_schedules_monitor_message, var.message)}"
query = <<EOF
sum(${var.ark_schedules_monitor_timeframe}):min:ark.ark_backup_failure_total${module.filter-tags.query_alert} by {schedule}.as_count() > 1
EOF
thresholds {
critical = 1
warning = 0
}
evaluation_delay = "${var.evaluation_delay}"
new_host_delay = "${var.new_host_delay}"
no_data_timeframe = "${var.ark_schedules_monitor_no_data_timeframe}"
notify_no_data = true
renotify_interval = 0
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = false
silenced = "${var.ark_schedules_monitor_silenced}"
tags = ["env:${var.environment}", "type:caas", "provider:prometheus", "resource:ark", "team:claranet", "created-by:terraform", "${var.ark_schedules_extra_tags}"]
}

View File

@ -0,0 +1,4 @@
output "ark_schedules_monitor_id" {
description = "id for monitor ark_schedules_monitor"
value = "${datadog_monitor.ark_schedules_monitor.*.id}"
}