From f18ddd957285ef58a6ecc0899993e69a0f99a1d7 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Tue, 20 Aug 2019 07:55:06 +0100 Subject: [PATCH] MON-39 New Relic monitors: Apdex Score Ratio, Error Rate and No Data. --- README.md | 2 + saas/new-relic/README.md | 29 +++++ saas/new-relic/inputs.tf | 176 +++++++++++++++++++++++++++ saas/new-relic/modules.tf | 9 ++ saas/new-relic/monitors-new-relic.tf | 105 ++++++++++++++++ saas/new-relic/outputs.tf | 15 +++ 6 files changed, 336 insertions(+) create mode 100644 saas/new-relic/README.md create mode 100644 saas/new-relic/inputs.tf create mode 100644 saas/new-relic/modules.tf create mode 100644 saas/new-relic/monitors-new-relic.tf create mode 100644 saas/new-relic/outputs.tf diff --git a/README.md b/README.md index 3b21e8c..d30b9fe 100644 --- a/README.md +++ b/README.md @@ -196,6 +196,8 @@ The `//` is very important, it's a terraform specific syntax used to separate gi - [apache](https://git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors/tree/master/middleware/apache/) - [nginx](https://git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors/tree/master/middleware/nginx/) - [php-fpm](https://git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors/tree/master/middleware/php-fpm/) +- [saas](https://git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors/tree/master/saas/) + - [new-relic](https://git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors/tree/master/saas/new-relic/) - [system](https://git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors/tree/master/system/) - [generic](https://git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors/tree/master/system/generic/) - [unreachable](https://git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors/tree/master/system/unreachable/) diff --git a/saas/new-relic/README.md b/saas/new-relic/README.md new file mode 100644 index 0000000..9df0e65 --- /dev/null +++ b/saas/new-relic/README.md @@ -0,0 +1,29 @@ +# SAAS NEW-RELIC DataDog monitors + +## How to use this module + +``` +module "datadog-monitors-saas-new-relic" { + source = "git::ssh://git@git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors.git//saas/new-relic?ref={revision}" + + environment = var.environment + message = module.datadog-message-alerting.alerting-message +} + +``` + +## Purpose + +Creates DataDog monitors with the following checks: + +- New Relic Apdex score ratio +- New Relic application has no-data +- New Relic Error rate + +Usage: + terraform-docs [--no-required] [json | md | markdown] ... + terraform-docs -h | --help + +## Related documentation + +Datadog Documentation https://docs.datadoghq.com/integrations/new_relic/ \ No newline at end of file diff --git a/saas/new-relic/inputs.tf b/saas/new-relic/inputs.tf new file mode 100644 index 0000000..47591fd --- /dev/null +++ b/saas/new-relic/inputs.tf @@ -0,0 +1,176 @@ +# Global Terraform +variable "environment" { + description = "Architecture Environment" + type = string +} + +# Global DataDog +variable "evaluation_delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +variable "new_host_delay" { + description = "Delay in seconds before monitor new resource" + default = 300 +} + +variable "prefix_slug" { + description = "Prefix string to prepend between brackets on every monitors names" + default = "" +} + +variable "message" { + description = "Message sent when an alert is triggered" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +variable "filter_tags_custom_excluded" { + description = "Tags excluded for custom filtering when filter_tags_use_defaults is false" + default = "" +} + +# +# APP Error Rate +# +variable "app_error_rate_enabled" { + description = "Flag to enable APP Error Rate monitor" + type = string + default = "true" +} + +variable "app_error_rate_message" { + description = "Custom message for the APP Error Rate monitor" + type = string + default = "" +} + +variable "app_error_rate_time_aggregator" { + description = "Time aggregator for the APP Error Rate monitor" + type = string + default = "min" +} + +variable "app_error_rate_timeframe" { + description = "Timeframe for the APP Error Rate monitor" + type = string + default = "last_5m" +} + +variable "app_error_rate_threshold_warning" { + description = "APP Error Rate warning threshold" + type = string + default = 1 +} + +variable "app_error_rate_threshold_critical" { + description = "APP Error Rate critical threshold" + type = string + default = 5 +} + +variable "app_error_rate_extra_tags" { + description = "Extra tags for New Relic APP Error Rate monitor" + type = list(string) + default = [] +} + +# +# APP Apdex Score +# +variable "app_apdex_score_enabled" { + description = "Flag to enable APP Apdex Score monitor" + type = string + default = "true" +} + +variable "app_apdex_score_message" { + description = "Custom message for the APP Apdex Score monitor" + type = string + default = "" +} + +variable "app_apdex_score_time_aggregator" { + description = "Time aggregator for the APP Apdex Score monitor" + type = string + default = "avg" +} + +variable "app_apdex_score_timeframe" { + description = "Timeframe for the APP Apdex Score monitor" + type = string + default = "last_15m" +} + +variable "app_apdex_score_threshold_warning" { + description = "APP Apdex Score warning threshold" + type = string + default = 0.5 +} + +variable "app_apdex_score_threshold_critical" { + description = "APP Apdex Score critical threshold" + type = string + default = 0.25 +} + +variable "app_apdex_score_extra_tags" { + description = "Extra tags for New Relic APP Apdex Score monitor" + type = list(string) + default = [] +} + +# +# Data sent +# +variable "data_sent_enabled" { + description = "Flag to enable New Relic Data Sent monitor" + type = string + default = "true" +} + +variable "data_sent_message" { + description = "Custom message for the New Relic Data Sent monitor" + type = string + default = "" +} + +variable "data_sent_time_aggregator" { + description = "Time aggregator for the New Relic Data Sent monitor" + type = string + default = "avg" +} + +variable "data_sent_timeframe" { + description = "Timeframe for the New Relic Data Sent monitor" + type = string + default = "last_15m" +} + +variable "data_sent_threshold_critical" { + description = "Critical threshold for the New Relic Data Sent monitor" + type = string + default = 0 +} + +variable "data_sent_no_data_timeframe" { + description = "No data timeframe for the New Relic Data Sent monitor" + type = string + default = 30 +} + +variable "data_sent_extra_tags" { + description = "Extra tags for New Relic Data Sent monitor" + type = list(string) + default = [] +} + diff --git a/saas/new-relic/modules.tf b/saas/new-relic/modules.tf new file mode 100644 index 0000000..7e6a157 --- /dev/null +++ b/saas/new-relic/modules.tf @@ -0,0 +1,9 @@ +module "filter-tags" { + source = "../../common/filter-tags" + + environment = var.environment + resource = "new_relic" + filter_tags_use_defaults = var.filter_tags_use_defaults + filter_tags_custom = var.filter_tags_custom + filter_tags_custom_excluded = var.filter_tags_custom_excluded +} diff --git a/saas/new-relic/monitors-new-relic.tf b/saas/new-relic/monitors-new-relic.tf new file mode 100644 index 0000000..fd0f95c --- /dev/null +++ b/saas/new-relic/monitors-new-relic.tf @@ -0,0 +1,105 @@ +resource "datadog_monitor" "app_error_rate" { + count = var.app_error_rate_enabled == "true" ? 1 : 0 + name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] New Relic Error rate {{#is_alert}}{{{comparator}}} {{threshold}}errs/min ({{value}}errs/min){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}errs/min ({{value}}errs/min){{/is_warning}}" + message = coalesce(var.app_error_rate_message, var.message) + type = "query alert" + + query = < ${var.app_error_rate_threshold_critical} +EOQ + + thresholds = { + warning = var.app_error_rate_threshold_warning + critical = var.app_error_rate_threshold_critical + } + + evaluation_delay = var.evaluation_delay + new_host_delay = var.new_host_delay + notify_no_data = false + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + + tags = concat(["env:${var.environment}", "type:saas", "provider:new-relic", "resource:new-relic", "team:claranet", "created-by:terraform"], var.app_error_rate_extra_tags) + + lifecycle { + ignore_changes = ["silenced"] + } +} + +resource "datadog_monitor" "app_apdex_score" { + count = var.app_apdex_score_enabled == "true" ? 1 : 0 + name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] New Relic Apdex score ratio {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = coalesce(var.app_apdex_score_message, var.message) + type = "query alert" + + query = <