MON-46 Add default on all queries. Remove no data from invocations. Improve naming.
This commit is contained in:
parent
4d39f320f7
commit
f2cf760628
@ -16,9 +16,9 @@ module "datadog-monitors-cloud-aws-lambda" {
|
|||||||
|
|
||||||
Creates DataDog monitors with the following checks:
|
Creates DataDog monitors with the following checks:
|
||||||
|
|
||||||
- Lambda Number of Errors
|
- Lambda Invocations throttled due to concurrent limit reached
|
||||||
- Lambda Number of Invocations (disabled by default)
|
- Lambda Number of errors
|
||||||
- Lambda Number of Throttles
|
- Lambda Number of invocations (disabled by default)
|
||||||
- Lambda Percentage of errors
|
- Lambda Percentage of errors
|
||||||
|
|
||||||
## Inputs
|
## Inputs
|
||||||
@ -44,7 +44,7 @@ Creates DataDog monitors with the following checks:
|
|||||||
| invocations\_threshold\_critical | Alerting threshold in number of invocations | string | `"1"` | no |
|
| invocations\_threshold\_critical | Alerting threshold in number of invocations | string | `"1"` | no |
|
||||||
| invocations\_threshold\_warning | Warning threshold in number of invocations | string | `"2"` | no |
|
| invocations\_threshold\_warning | Warning threshold in number of invocations | string | `"2"` | no |
|
||||||
| invocations\_time\_aggregator | Monitor aggregator for Invocations [available values: min, max or avg] | string | `"sum"` | no |
|
| invocations\_time\_aggregator | Monitor aggregator for Invocations [available values: min, max or avg] | string | `"sum"` | no |
|
||||||
| invocations\_timeframe | Monitor timeframe for Invocations [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_1h"` | no |
|
| invocations\_timeframe | Monitor timeframe for Invocations [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_30m"` | no |
|
||||||
| message | Message sent when a monitor is triggered | string | n/a | yes |
|
| message | Message sent when a monitor is triggered | string | n/a | yes |
|
||||||
| new\_host\_delay | Delay in seconds before monitor new resource | string | `"300"` | no |
|
| new\_host\_delay | Delay in seconds before monitor new resource | string | `"300"` | no |
|
||||||
| pct\_errors\_enabled | Flag to enable Percentage of errors monitor | string | `"true"` | no |
|
| pct\_errors\_enabled | Flag to enable Percentage of errors monitor | string | `"true"` | no |
|
||||||
|
|||||||
@ -192,7 +192,7 @@ variable "invocations_time_aggregator" {
|
|||||||
variable "invocations_timeframe" {
|
variable "invocations_timeframe" {
|
||||||
description = "Monitor timeframe for Invocations [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
description = "Monitor timeframe for Invocations [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||||
type = string
|
type = string
|
||||||
default = "last_1h"
|
default = "last_30m"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "invocations_threshold_critical" {
|
variable "invocations_threshold_critical" {
|
||||||
|
|||||||
@ -7,10 +7,11 @@ resource "datadog_monitor" "pct_errors" {
|
|||||||
|
|
||||||
query = <<EOQ
|
query = <<EOQ
|
||||||
${var.pct_errors_time_aggregator}(${var.pct_errors_timeframe}):
|
${var.pct_errors_time_aggregator}(${var.pct_errors_timeframe}):
|
||||||
(sum:aws.lambda.errors${module.filter-tags.query_alert} by {region,functionname}.as_count()
|
default(
|
||||||
|
(default(sum:aws.lambda.errors${module.filter-tags.query_alert} by {region,functionname}.as_count(),0)
|
||||||
/
|
/
|
||||||
sum:aws.lambda.invocations${module.filter-tags.query_alert} by {region,functionname}.as_count())
|
default(sum:aws.lambda.invocations${module.filter-tags.query_alert} by {region,functionname}.as_count(),1))
|
||||||
* 100
|
* 100,0)
|
||||||
> ${var.pct_errors_threshold_critical}
|
> ${var.pct_errors_threshold_critical}
|
||||||
EOQ
|
EOQ
|
||||||
|
|
||||||
@ -40,13 +41,13 @@ resource "datadog_monitor" "pct_errors" {
|
|||||||
# Errors Absolute Value
|
# Errors Absolute Value
|
||||||
resource "datadog_monitor" "errors" {
|
resource "datadog_monitor" "errors" {
|
||||||
count = var.errors_enabled == "true" ? 1 : 0
|
count = var.errors_enabled == "true" ? 1 : 0
|
||||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Lambda Number of Errors {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}"
|
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Lambda Number of errors {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}"
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
message = coalesce(var.errors_message, var.message)
|
message = coalesce(var.errors_message, var.message)
|
||||||
|
|
||||||
query = <<EOQ
|
query = <<EOQ
|
||||||
${var.errors_time_aggregator}(${var.errors_timeframe}):
|
${var.errors_time_aggregator}(${var.errors_timeframe}):
|
||||||
sum:aws.lambda.errors${module.filter-tags.query_alert} by {region,functionname}.as_count()
|
default(sum:aws.lambda.errors${module.filter-tags.query_alert} by {region,functionname}.as_count(),0)
|
||||||
> ${var.errors_threshold_critical}
|
> ${var.errors_threshold_critical}
|
||||||
EOQ
|
EOQ
|
||||||
|
|
||||||
@ -76,13 +77,13 @@ resource "datadog_monitor" "errors" {
|
|||||||
# Throttles
|
# Throttles
|
||||||
resource "datadog_monitor" "throttles" {
|
resource "datadog_monitor" "throttles" {
|
||||||
count = var.throttles_enabled == "true" ? 1 : 0
|
count = var.throttles_enabled == "true" ? 1 : 0
|
||||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Lambda Number of Throttles {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}"
|
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Lambda Invocations throttled due to concurrent limit reached {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}"
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
message = coalesce(var.throttles_message, var.message)
|
message = coalesce(var.throttles_message, var.message)
|
||||||
|
|
||||||
query = <<EOQ
|
query = <<EOQ
|
||||||
${var.throttles_time_aggregator}(${var.throttles_timeframe}):
|
${var.throttles_time_aggregator}(${var.throttles_timeframe}):
|
||||||
sum:aws.lambda.throttles${module.filter-tags.query_alert} by {region,functionname}.as_count()
|
default(sum:aws.lambda.throttles${module.filter-tags.query_alert} by {region,functionname}.as_count(),0)
|
||||||
> ${var.throttles_threshold_critical}
|
> ${var.throttles_threshold_critical}
|
||||||
EOQ
|
EOQ
|
||||||
|
|
||||||
@ -112,14 +113,14 @@ resource "datadog_monitor" "throttles" {
|
|||||||
# INVOCATIONS
|
# INVOCATIONS
|
||||||
resource "datadog_monitor" "invocations" {
|
resource "datadog_monitor" "invocations" {
|
||||||
count = var.invocations_enabled == "true" ? 1 : 0
|
count = var.invocations_enabled == "true" ? 1 : 0
|
||||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Lambda Number of Invocations {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}"
|
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Lambda Number of invocations {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}"
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
message = coalesce(var.invocations_message, var.message)
|
message = coalesce(var.invocations_message, var.message)
|
||||||
|
|
||||||
query = <<EOQ
|
query = <<EOQ
|
||||||
${var.invocations_time_aggregator}(${var.invocations_timeframe}):
|
${var.invocations_time_aggregator}(${var.invocations_timeframe}):
|
||||||
sum:aws.lambda.invocations${module.filter-tags.query_alert} by {region,functionname}.as_count()
|
default(sum:aws.lambda.invocations${module.filter-tags.query_alert} by {region,functionname}.as_count(),0)
|
||||||
< ${var.invocations_threshold_critical}
|
<= ${var.invocations_threshold_critical}
|
||||||
EOQ
|
EOQ
|
||||||
|
|
||||||
evaluation_delay = var.evaluation_delay
|
evaluation_delay = var.evaluation_delay
|
||||||
@ -130,8 +131,7 @@ resource "datadog_monitor" "invocations" {
|
|||||||
warning = var.invocations_threshold_warning
|
warning = var.invocations_threshold_warning
|
||||||
}
|
}
|
||||||
|
|
||||||
notify_no_data = true
|
notify_no_data = false
|
||||||
no_data_timeframe = var.invocations_no_data_timeframe
|
|
||||||
require_full_window = false
|
require_full_window = false
|
||||||
renotify_interval = 0
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user