Merge branch 'MON-444_fix_threshold_for_pod_error' into 'master'

MON-444 fix operator and timeframe for pod error monitor

Closes MON-444

See merge request claranet/pt-monitoring/projects/datadog/terraform/monitors!60
This commit is contained in:
Quentin Manfroi 2019-05-09 18:19:34 +02:00
commit b5012e8182
3 changed files with 3 additions and 3 deletions

View File

@ -30,7 +30,7 @@ Creates DataDog monitors with the following checks:
| error\_silenced | Groups to mute for Pod errors monitor | map | `{}` | no |
| error\_threshold\_critical | error critical threshold | string | `"0.5"` | no |
| error\_threshold\_warning | error warning threshold | string | `"0"` | no |
| error\_time\_aggregator | Monitor aggregator for Pod errors [available values: min, max or avg] | string | `"sum"` | no |
| error\_time\_aggregator | Monitor aggregator for Pod errors [available values: min, max or avg] | string | `"min"` | no |
| error\_timeframe | Monitor timeframe for Pod errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_15m"` | no |
| evaluation\_delay | Delay in seconds for the metric evaluation | string | `"15"` | no |
| filter\_tags\_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `"*"` | no |

View File

@ -103,7 +103,7 @@ variable "error_message" {
variable "error_time_aggregator" {
description = "Monitor aggregator for Pod errors [available values: min, max or avg]"
type = "string"
default = "sum"
default = "min"
}
variable "error_timeframe" {

View File

@ -37,7 +37,7 @@ resource "datadog_monitor" "error" {
query = <<EOQ
${var.error_time_aggregator}(${var.error_timeframe}):
sum:kubernetes_state.container.status_report.count.waiting${module.filter-tags-nocontainercreating.query_alert} by {namespace,pod,reason}.as_count()
>= ${var.error_threshold_critical}
> ${var.error_threshold_critical}
EOQ
thresholds {