From 02cdaef1faf6c59cff65a80e298b9828618effc5 Mon Sep 17 00:00:00 2001 From: "gauthier.ampe@fr.clara.net" Date: Wed, 11 Sep 2019 10:12:31 +0200 Subject: [PATCH 1/3] MON-221 add zookeeper monitors --- README.md | 1 + database/zookeeper/README.md | 56 +++++++++++ database/zookeeper/inputs.tf | 114 +++++++++++++++++++++++ database/zookeeper/modules.tf | 20 ++++ database/zookeeper/monitors-zookeeper.tf | 58 ++++++++++++ database/zookeeper/outputs.tf | 10 ++ 6 files changed, 259 insertions(+) create mode 100644 database/zookeeper/README.md create mode 100644 database/zookeeper/inputs.tf create mode 100644 database/zookeeper/modules.tf create mode 100644 database/zookeeper/monitors-zookeeper.tf create mode 100644 database/zookeeper/outputs.tf diff --git a/README.md b/README.md index 44a6e0b..f4cd71f 100644 --- a/README.md +++ b/README.md @@ -219,3 +219,4 @@ module "datadog-monitors-system-generic" { - [system](https://github.com/claranet/terraform-datadog-monitors/tree/master/system/) - [generic](https://github.com/claranet/terraform-datadog-monitors/tree/master/system/generic/) - [unreachable](https://github.com/claranet/terraform-datadog-monitors/tree/master/system/unreachable/) + diff --git a/database/zookeeper/README.md b/database/zookeeper/README.md new file mode 100644 index 0000000..547d125 --- /dev/null +++ b/database/zookeeper/README.md @@ -0,0 +1,56 @@ +# DATABASE ZOOKEEPER DataDog monitors + +## How to use this module + +``` +module "datadog-monitors-database-zookeeper" { + source = "git::ssh://git@git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors.git//database/zookeeper?ref={revision}" + + environment = var.environment + message = module.datadog-message-alerting.alerting-message +} + +``` + +## Purpose + +Creates DataDog monitors with the following checks: + +- Zookeeper latency +- Zookeeper process is down + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| environment | Architecture environment | string | n/a | yes | +| evaluation\_delay | Delay in seconds for the metric evaluation | string | `"15"` | no | +| filter\_tags\_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `"*"` | no | +| filter\_tags\_custom\_excluded | Tags excluded for custom filtering when filter_tags_use_defaults is false | string | `""` | no | +| filter\_tags\_use\_defaults | Use default filter tags convention | string | `"true"` | no | +| message | Message sent when a monitor is triggered | string | n/a | yes | +| new\_host\_delay | Delay in seconds before begin to monitor new host | string | `"300"` | no | +| prefix\_slug | Prefix string to prepend between brackets on every monitors names | string | `""` | no | +| zookeeper\_latency\_availability\_extra\_tags | Extra tags for zookeeper read latency monitor | list(string) | `[]` | no | +| zookeeper\_latency\_enabled | Flag to enable Zookeeper read latency monitor | string | `"true"` | no | +| zookeeper\_latency\_status\_message | Custom message for Zookeeper read latency monitor | string | `""` | no | +| zookeeper\_latency\_threshold\_critical | Maximum critical acceptable ms of zookeeper latency monitor | string | `"300000"` | no | +| zookeeper\_latency\_threshold\_warning | Maximum warning acceptable ms of zookeeper latency monitor | string | `"250000"` | no | +| zookeeper\_latency\_time\_aggregator | Monitor time aggregator for Zookeeper read latency monitor [available values: min, max or avg] | string | `"avg"` | no | +| zookeeper\_latency\_timeframe | Monitor timeframe for Zookeeper read latency monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_15m"` | no | +| zookeeper\_process\_enabled | Flag to enable Zookeeper does not respond monitor | string | `"true"` | no | +| zookeeper\_process\_extra\_tags | Extra tags for Zookeeper does not respond monitor | list(string) | `[]` | no | +| zookeeper\_process\_message | Custom message for Zookeeper does not respond monitor | string | `""` | no | +| zookeeper\_process\_time\_aggregator | Time aggregator for the Zookeeper does not respond monitor | string | `"avg"` | no | +| zookeeper\_process\_timeframe | Timeframe for the does not respond monitor | string | `"last_10m"` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| datadog\_monitor\_zookeeper\_latency\_id | id for monitor datadog_monitor_zookeeper_latency | +| datadog\_zookeeper\_process\_down\_id | id for monitor datadog_zookeeper_process_down | + +## Related documentation +* [Integration Datadog & ElasticSearch](https://docs.datadoghq.com/integrations/elastic/) +* [How to monitor ElasticSearch with Datadog](https://www.datadoghq.com/blog/monitor-elasticsearch-datadog/) diff --git a/database/zookeeper/inputs.tf b/database/zookeeper/inputs.tf new file mode 100644 index 0000000..42c0b04 --- /dev/null +++ b/database/zookeeper/inputs.tf @@ -0,0 +1,114 @@ +# +# Datadog global variables +# +variable "environment" { + description = "Architecture environment" + type = string +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +variable "filter_tags_custom_excluded" { + description = "Tags excluded for custom filtering when filter_tags_use_defaults is false" + default = "" +} + +variable "message" { + description = "Message sent when a monitor is triggered" +} + +variable "evaluation_delay" { + description = "Delay in seconds for the metric evaluation" + default = 15 +} + +variable "new_host_delay" { + description = "Delay in seconds before begin to monitor new host" + default = 300 +} + +variable "prefix_slug" { + description = "Prefix string to prepend between brackets on every monitors names" + default = "" +} + +# Service Check +variable "zookeeper_process_enabled" { + description = "Flag to enable Zookeeper does not respond monitor" + type = string + default = "true" +} + +variable "zookeeper_process_message" { + description = "Custom message for Zookeeper does not respond monitor" + type = string + default = "" +} + +variable "zookeeper_process_time_aggregator" { + description = "Time aggregator for the Zookeeper does not respond monitor" + type = string + default = "avg" +} + +variable "zookeeper_process_timeframe" { + description = "Timeframe for the does not respond monitor" + type = string + default = "last_10m" +} + +variable "zookeeper_process_extra_tags" { + description = "Extra tags for Zookeeper does not respond monitor" + type = list(string) + default = [] +} + + +## Check read latency monitor +variable "zookeeper_latency_enabled" { + description = "Flag to enable Zookeeper read latency monitor" + type = string + default = "true" +} + +variable "zookeeper_latency_status_message" { + description = "Custom message for Zookeeper read latency monitor" + type = string + default = "" +} + +variable "zookeeper_latency_time_aggregator" { + description = "Monitor time aggregator for Zookeeper read latency monitor [available values: min, max or avg]" + type = string + default = "avg" +} + +variable "zookeeper_latency_timeframe" { + description = "Monitor timeframe for Zookeeper read latency monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = string + default = "last_15m" +} + +variable "zookeeper_latency_threshold_critical" { + description = "Maximum critical acceptable ms of zookeeper latency monitor" + default = 300000 +} + +variable "zookeeper_latency_threshold_warning" { + description = "Maximum warning acceptable ms of zookeeper latency monitor" + default = 250000 +} + +variable "zookeeper_latency_availability_extra_tags" { + description = "Extra tags for zookeeper read latency monitor" + type = list(string) + default = [] +} diff --git a/database/zookeeper/modules.tf b/database/zookeeper/modules.tf new file mode 100644 index 0000000..6b17d21 --- /dev/null +++ b/database/zookeeper/modules.tf @@ -0,0 +1,20 @@ +module "filter-tags" { + source = "../../common/filter-tags" + + environment = var.environment + resource = "zookeeper" + filter_tags_use_defaults = var.filter_tags_use_defaults + filter_tags_custom = var.filter_tags_custom + filter_tags_custom_excluded = var.filter_tags_custom_excluded +} + +module "filter-tags-check-process" { + source = "../../common/filter-tags" + + environment = var.environment + resource = "zookeeper" + filter_tags_use_defaults = var.filter_tags_use_defaults + filter_tags_custom = var.filter_tags_custom + filter_tags_custom_excluded = var.filter_tags_custom_excluded + extra_tags = ["dd_process_name:zookeeper"] +} diff --git a/database/zookeeper/monitors-zookeeper.tf b/database/zookeeper/monitors-zookeeper.tf new file mode 100644 index 0000000..ff25ac4 --- /dev/null +++ b/database/zookeeper/monitors-zookeeper.tf @@ -0,0 +1,58 @@ +resource "datadog_monitor" "datadog_zookeeper_process_down" { + count = var.zookeeper_process_enabled == "true" ? 1 : 0 + name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Zookeeper process is down" + message = coalesce(var.zookeeper_process_message, var.message) + type = "metric alert" + + query = < ${var.zookeeper_latency_threshold_critical} +EOQ + + thresholds = { + warning = var.zookeeper_latency_threshold_warning + critical = var.zookeeper_latency_threshold_critical + } + + notify_no_data = false + evaluation_delay = 15 + new_host_delay = 300 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + + tags = concat(["env:${var.environment}", "type:database", "provider:zookeeper", "resource:zookeeper", "team:claranet", "created-by:terraform"], var.zookeeper_latency_availability_extra_tags) + + lifecycle { + ignore_changes = ["silenced"] + } +} diff --git a/database/zookeeper/outputs.tf b/database/zookeeper/outputs.tf new file mode 100644 index 0000000..bdc52b3 --- /dev/null +++ b/database/zookeeper/outputs.tf @@ -0,0 +1,10 @@ +output "datadog_monitor_zookeeper_latency_id" { + description = "id for monitor datadog_monitor_zookeeper_latency" + value = datadog_monitor.datadog_monitor_zookeeper_latency.*.id +} + +output "datadog_zookeeper_process_down_id" { + description = "id for monitor datadog_zookeeper_process_down" + value = datadog_monitor.datadog_zookeeper_process_down.*.id +} + From ce25b90c59de311d29c06df999f731ea7953485d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20MARMOL?= Date: Fri, 24 Jan 2020 15:31:31 +0100 Subject: [PATCH 2/3] MON-221 change process to service check --- README.md | 2 +- database/zookeeper/README.md | 63 ++++++++++++-------- database/zookeeper/inputs.tf | 76 +++++++++++++++++++++--- database/zookeeper/monitors-zookeeper.tf | 55 +++++++++-------- database/zookeeper/outputs.tf | 6 +- 5 files changed, 141 insertions(+), 61 deletions(-) diff --git a/README.md b/README.md index f4cd71f..b62e5e7 100644 --- a/README.md +++ b/README.md @@ -203,6 +203,7 @@ module "datadog-monitors-system-generic" { - [redis](https://github.com/claranet/terraform-datadog-monitors/tree/master/database/redis/) - [solr](https://github.com/claranet/terraform-datadog-monitors/tree/master/database/solr/) - [sqlserver](https://github.com/claranet/terraform-datadog-monitors/tree/master/database/sqlserver/) + - [zookeeper](https://github.com/claranet/terraform-datadog-monitors/tree/master/database/zookeeper/) - [middleware](https://github.com/claranet/terraform-datadog-monitors/tree/master/middleware/) - [apache](https://github.com/claranet/terraform-datadog-monitors/tree/master/middleware/apache/) - [kong](https://github.com/claranet/terraform-datadog-monitors/tree/master/middleware/kong/) @@ -219,4 +220,3 @@ module "datadog-monitors-system-generic" { - [system](https://github.com/claranet/terraform-datadog-monitors/tree/master/system/) - [generic](https://github.com/claranet/terraform-datadog-monitors/tree/master/system/generic/) - [unreachable](https://github.com/claranet/terraform-datadog-monitors/tree/master/system/unreachable/) - diff --git a/database/zookeeper/README.md b/database/zookeeper/README.md index 547d125..2109ada 100644 --- a/database/zookeeper/README.md +++ b/database/zookeeper/README.md @@ -2,9 +2,10 @@ ## How to use this module -``` +```hcl module "datadog-monitors-database-zookeeper" { - source = "git::ssh://git@git.fr.clara.net/claranet/pt-monitoring/projects/datadog/terraform/monitors.git//database/zookeeper?ref={revision}" + source = "claranet/monitors/datadog//database/zookeeper" + version = "{revision}" environment = var.environment message = module.datadog-message-alerting.alerting-message @@ -17,39 +18,49 @@ module "datadog-monitors-database-zookeeper" { Creates DataDog monitors with the following checks: - Zookeeper latency -- Zookeeper process is down +- Zookeeper service does not respond ## Inputs | Name | Description | Type | Default | Required | -|------|-------------|:----:|:-----:|:-----:| -| environment | Architecture environment | string | n/a | yes | -| evaluation\_delay | Delay in seconds for the metric evaluation | string | `"15"` | no | -| filter\_tags\_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `"*"` | no | -| filter\_tags\_custom\_excluded | Tags excluded for custom filtering when filter_tags_use_defaults is false | string | `""` | no | -| filter\_tags\_use\_defaults | Use default filter tags convention | string | `"true"` | no | -| message | Message sent when a monitor is triggered | string | n/a | yes | -| new\_host\_delay | Delay in seconds before begin to monitor new host | string | `"300"` | no | -| prefix\_slug | Prefix string to prepend between brackets on every monitors names | string | `""` | no | -| zookeeper\_latency\_availability\_extra\_tags | Extra tags for zookeeper read latency monitor | list(string) | `[]` | no | -| zookeeper\_latency\_enabled | Flag to enable Zookeeper read latency monitor | string | `"true"` | no | -| zookeeper\_latency\_status\_message | Custom message for Zookeeper read latency monitor | string | `""` | no | -| zookeeper\_latency\_threshold\_critical | Maximum critical acceptable ms of zookeeper latency monitor | string | `"300000"` | no | -| zookeeper\_latency\_threshold\_warning | Maximum warning acceptable ms of zookeeper latency monitor | string | `"250000"` | no | -| zookeeper\_latency\_time\_aggregator | Monitor time aggregator for Zookeeper read latency monitor [available values: min, max or avg] | string | `"avg"` | no | -| zookeeper\_latency\_timeframe | Monitor timeframe for Zookeeper read latency monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_15m"` | no | -| zookeeper\_process\_enabled | Flag to enable Zookeeper does not respond monitor | string | `"true"` | no | -| zookeeper\_process\_extra\_tags | Extra tags for Zookeeper does not respond monitor | list(string) | `[]` | no | -| zookeeper\_process\_message | Custom message for Zookeeper does not respond monitor | string | `""` | no | -| zookeeper\_process\_time\_aggregator | Time aggregator for the Zookeeper does not respond monitor | string | `"avg"` | no | -| zookeeper\_process\_timeframe | Timeframe for the does not respond monitor | string | `"last_10m"` | no | +|------|-------------|------|---------|:-----:| +| environment | Architecture environment | `string` | n/a | yes | +| evaluation\_delay | Delay in seconds for the metric evaluation | `number` | `15` | no | +| filter\_tags\_custom | Tags used for custom filtering when filter\_tags\_use\_defaults is false | `string` | `"*"` | no | +| filter\_tags\_custom\_excluded | Tags excluded for custom filtering when filter\_tags\_use\_defaults is false | `string` | `""` | no | +| filter\_tags\_use\_defaults | Use default filter tags convention | `string` | `"true"` | no | +| message | Message sent when a monitor is triggered | `any` | n/a | yes | +| new\_host\_delay | Delay in seconds before begin to monitor new host | `number` | `300` | no | +| not\_responding\_group\_by | List of tags to use to group data | `list(string)` |
[
"host",
"server"
]
| no | +| not\_responding\_locked | Lock Zookeeper not responding monitor | `bool` | `false` | no | +| not\_responding\_no\_data\_timeframe | Zookeeper monitor no\_data\_timeframe | `number` | `10` | no | +| not\_responding\_notify\_audit | Enable or not notify audit on Zookeeper not responding monitor | `bool` | `false` | no | +| not\_responding\_notify\_no\_data | Send notification if not\_responding monitor does not retrieve data | `bool` | `true` | no | +| not\_responding\_threshold\_warning | Zookeeper not responding limit (warning threshold) | `number` | `3` | no | +| not\_responding\_timeout\_h | Number of hour of Zookeeper not responding monitor not reporting data before it will automatically resolve from a triggered state | `number` | `0` | no | +| prefix\_slug | Prefix string to prepend between brackets on every monitors names | `string` | `""` | no | +| zookeeper\_latency\_availability\_extra\_tags | Extra tags for zookeeper read latency monitor | `list(string)` | `[]` | no | +| zookeeper\_latency\_enabled | Flag to enable Zookeeper read latency monitor | `string` | `"true"` | no | +| zookeeper\_latency\_group\_by | Tags to use to group datas | `list(string)` |
[
"host"
]
| no | +| zookeeper\_latency\_notify\_audit | Enable or not notify audit on Zookeeper latency monitor | `bool` | `false` | no | +| zookeeper\_latency\_status\_message | Custom message for Zookeeper read latency monitor | `string` | `""` | no | +| zookeeper\_latency\_threshold\_critical | Maximum critical acceptable ms of zookeeper latency monitor | `number` | `300000` | no | +| zookeeper\_latency\_threshold\_warning | Maximum warning acceptable ms of zookeeper latency monitor | `number` | `250000` | no | +| zookeeper\_latency\_time\_aggregator | Monitor time aggregator for Zookeeper read latency monitor [available values: min, max or avg] | `string` | `"avg"` | no | +| zookeeper\_latency\_timeframe | Monitor timeframe for Zookeeper read latency monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | `string` | `"last_15m"` | no | +| zookeeper\_latency\_timeout\_h | Number of hour of Zookeeper latency monitor not reporting data before it will automatically resolve from a triggered state | `number` | `0` | no | +| zookeeper\_not\_responding\_enabled | Flag to enable Zookeeper does not respond monitor | `string` | `"true"` | no | +| zookeeper\_not\_responding\_extra\_tags | Extra tags for Zookeeper does not respond monitor | `list(string)` | `[]` | no | +| zookeeper\_not\_responding\_message | Custom message for Zookeeper does not respond monitor | `string` | `""` | no | +| zookeeper\_not\_responding\_time\_aggregator | Time aggregator for the Zookeeper does not respond monitor | `string` | `"avg"` | no | +| zookeeper\_not\_responding\_timeframe | Timeframe for the does not respond monitor | `string` | `"last_5m"` | no | ## Outputs | Name | Description | |------|-------------| -| datadog\_monitor\_zookeeper\_latency\_id | id for monitor datadog_monitor_zookeeper_latency | -| datadog\_zookeeper\_process\_down\_id | id for monitor datadog_zookeeper_process_down | +| datadog\_monitor\_zookeeper\_latency\_id | id for monitor datadog\_monitor\_zookeeper\_latency | +| not\_responding\_id | id for monitor not\_responding | ## Related documentation * [Integration Datadog & ElasticSearch](https://docs.datadoghq.com/integrations/elastic/) diff --git a/database/zookeeper/inputs.tf b/database/zookeeper/inputs.tf index 42c0b04..73ddf01 100644 --- a/database/zookeeper/inputs.tf +++ b/database/zookeeper/inputs.tf @@ -40,45 +40,95 @@ variable "prefix_slug" { default = "" } +# # Service Check -variable "zookeeper_process_enabled" { +# +variable "not_responding_notify_no_data" { + default = true + type = bool + description = "Send notification if not_responding monitor does not retrieve data" +} +variable "not_responding_no_data_timeframe" { + default = 10 + description = "Zookeeper monitor no_data_timeframe" + type = number +} + +variable "not_responding_group_by" { + default = ["host", "server"] + type = list(string) + description = "List of tags to use to group data" +} + +variable "zookeeper_not_responding_enabled" { description = "Flag to enable Zookeeper does not respond monitor" type = string default = "true" } -variable "zookeeper_process_message" { +variable "zookeeper_not_responding_message" { description = "Custom message for Zookeeper does not respond monitor" type = string default = "" } -variable "zookeeper_process_time_aggregator" { +variable "zookeeper_not_responding_time_aggregator" { description = "Time aggregator for the Zookeeper does not respond monitor" type = string default = "avg" } -variable "zookeeper_process_timeframe" { +variable "zookeeper_not_responding_timeframe" { description = "Timeframe for the does not respond monitor" type = string - default = "last_10m" + default = "last_5m" } -variable "zookeeper_process_extra_tags" { +variable "zookeeper_not_responding_extra_tags" { description = "Extra tags for Zookeeper does not respond monitor" type = list(string) default = [] } +variable "not_responding_threshold_warning" { + default = 3 + type = number + description = "Zookeeper not responding limit (warning threshold)" +} -## Check read latency monitor +variable "not_responding_notify_audit" { + description = "Enable or not notify audit on Zookeeper not responding monitor" + type = bool + default = false +} + +variable "not_responding_locked" { + description = "Lock Zookeeper not responding monitor" + type = bool + default = false +} + +variable "not_responding_timeout_h" { + description = "Number of hour of Zookeeper not responding monitor not reporting data before it will automatically resolve from a triggered state" + type = number + default = 0 +} + +# +# Check read latency monitor +# variable "zookeeper_latency_enabled" { description = "Flag to enable Zookeeper read latency monitor" type = string default = "true" } +variable "zookeeper_latency_group_by" { + description = "Tags to use to group datas" + type = list(string) + default = ["host"] +} + variable "zookeeper_latency_status_message" { description = "Custom message for Zookeeper read latency monitor" type = string @@ -112,3 +162,15 @@ variable "zookeeper_latency_availability_extra_tags" { type = list(string) default = [] } + +variable "zookeeper_latency_notify_audit" { + description = "Enable or not notify audit on Zookeeper latency monitor" + type = bool + default = false +} + +variable "zookeeper_latency_timeout_h" { + description = "Number of hour of Zookeeper latency monitor not reporting data before it will automatically resolve from a triggered state" + type = number + default = 0 +} diff --git a/database/zookeeper/monitors-zookeeper.tf b/database/zookeeper/monitors-zookeeper.tf index ff25ac4..d6ee28e 100644 --- a/database/zookeeper/monitors-zookeeper.tf +++ b/database/zookeeper/monitors-zookeeper.tf @@ -1,39 +1,45 @@ -resource "datadog_monitor" "datadog_zookeeper_process_down" { - count = var.zookeeper_process_enabled == "true" ? 1 : 0 - name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Zookeeper process is down" - message = coalesce(var.zookeeper_process_message, var.message) - type = "metric alert" +resource "datadog_monitor" "not_responding" { + count = var.zookeeper_not_responding_enabled ? 1 : 0 + name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Zookeeper service does not respond {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = coalesce(var.zookeeper_not_responding_message, var.message) + type = "service check" query = < ${var.zookeeper_latency_threshold_critical} + zookeeper.avg_latency${module.filter-tags.query_alert} by {${join(",", var.zookeeper_latency_group_by)}}) > ${var.zookeeper_latency_threshold_critical} EOQ thresholds = { @@ -42,17 +48,18 @@ EOQ } notify_no_data = false - evaluation_delay = 15 - new_host_delay = 300 - notify_audit = false - timeout_h = 0 + evaluation_delay = var.evaluation_delay + new_host_delay = var.new_host_delay + notify_audit = var.zookeeper_latency_notify_audit + timeout_h = var.zookeeper_latency_timeout_h include_tags = true locked = false require_full_window = true - tags = concat(["env:${var.environment}", "type:database", "provider:zookeeper", "resource:zookeeper", "team:claranet", "created-by:terraform"], var.zookeeper_latency_availability_extra_tags) + tags = concat(["env:${var.environment}", "type:database", "provider:zookeeper", "resource:zookeeper", "team:claranet", + "created-by:terraform"], var.zookeeper_latency_availability_extra_tags) lifecycle { - ignore_changes = ["silenced"] + ignore_changes = [silenced] } } diff --git a/database/zookeeper/outputs.tf b/database/zookeeper/outputs.tf index bdc52b3..f9403da 100644 --- a/database/zookeeper/outputs.tf +++ b/database/zookeeper/outputs.tf @@ -3,8 +3,8 @@ output "datadog_monitor_zookeeper_latency_id" { value = datadog_monitor.datadog_monitor_zookeeper_latency.*.id } -output "datadog_zookeeper_process_down_id" { - description = "id for monitor datadog_zookeeper_process_down" - value = datadog_monitor.datadog_zookeeper_process_down.*.id +output "not_responding_id" { + description = "id for monitor not_responding" + value = datadog_monitor.not_responding.*.id } From 32173e586c1fd2e8c5ea8760072295f60d583929 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Mon, 30 Mar 2020 13:56:47 +0200 Subject: [PATCH 3/3] MON-221 apply some conventions --- database/zookeeper/README.md | 9 ++------ database/zookeeper/inputs.tf | 29 ------------------------ database/zookeeper/modules.tf | 10 -------- database/zookeeper/monitors-zookeeper.tf | 10 ++++---- 4 files changed, 7 insertions(+), 51 deletions(-) diff --git a/database/zookeeper/README.md b/database/zookeeper/README.md index 2109ada..5db9669 100644 --- a/database/zookeeper/README.md +++ b/database/zookeeper/README.md @@ -32,23 +32,18 @@ Creates DataDog monitors with the following checks: | message | Message sent when a monitor is triggered | `any` | n/a | yes | | new\_host\_delay | Delay in seconds before begin to monitor new host | `number` | `300` | no | | not\_responding\_group\_by | List of tags to use to group data | `list(string)` |
[
"host",
"server"
]
| no | -| not\_responding\_locked | Lock Zookeeper not responding monitor | `bool` | `false` | no | | not\_responding\_no\_data\_timeframe | Zookeeper monitor no\_data\_timeframe | `number` | `10` | no | -| not\_responding\_notify\_audit | Enable or not notify audit on Zookeeper not responding monitor | `bool` | `false` | no | | not\_responding\_notify\_no\_data | Send notification if not\_responding monitor does not retrieve data | `bool` | `true` | no | | not\_responding\_threshold\_warning | Zookeeper not responding limit (warning threshold) | `number` | `3` | no | -| not\_responding\_timeout\_h | Number of hour of Zookeeper not responding monitor not reporting data before it will automatically resolve from a triggered state | `number` | `0` | no | | prefix\_slug | Prefix string to prepend between brackets on every monitors names | `string` | `""` | no | | zookeeper\_latency\_availability\_extra\_tags | Extra tags for zookeeper read latency monitor | `list(string)` | `[]` | no | | zookeeper\_latency\_enabled | Flag to enable Zookeeper read latency monitor | `string` | `"true"` | no | | zookeeper\_latency\_group\_by | Tags to use to group datas | `list(string)` |
[
"host"
]
| no | -| zookeeper\_latency\_notify\_audit | Enable or not notify audit on Zookeeper latency monitor | `bool` | `false` | no | | zookeeper\_latency\_status\_message | Custom message for Zookeeper read latency monitor | `string` | `""` | no | | zookeeper\_latency\_threshold\_critical | Maximum critical acceptable ms of zookeeper latency monitor | `number` | `300000` | no | | zookeeper\_latency\_threshold\_warning | Maximum warning acceptable ms of zookeeper latency monitor | `number` | `250000` | no | | zookeeper\_latency\_time\_aggregator | Monitor time aggregator for Zookeeper read latency monitor [available values: min, max or avg] | `string` | `"avg"` | no | | zookeeper\_latency\_timeframe | Monitor timeframe for Zookeeper read latency monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | `string` | `"last_15m"` | no | -| zookeeper\_latency\_timeout\_h | Number of hour of Zookeeper latency monitor not reporting data before it will automatically resolve from a triggered state | `number` | `0` | no | | zookeeper\_not\_responding\_enabled | Flag to enable Zookeeper does not respond monitor | `string` | `"true"` | no | | zookeeper\_not\_responding\_extra\_tags | Extra tags for Zookeeper does not respond monitor | `list(string)` | `[]` | no | | zookeeper\_not\_responding\_message | Custom message for Zookeeper does not respond monitor | `string` | `""` | no | @@ -63,5 +58,5 @@ Creates DataDog monitors with the following checks: | not\_responding\_id | id for monitor not\_responding | ## Related documentation -* [Integration Datadog & ElasticSearch](https://docs.datadoghq.com/integrations/elastic/) -* [How to monitor ElasticSearch with Datadog](https://www.datadoghq.com/blog/monitor-elasticsearch-datadog/) + +DataDog documentation: [https://docs.datadoghq.com/integrations/zk/](https://docs.datadoghq.com/integrations/zk/) diff --git a/database/zookeeper/inputs.tf b/database/zookeeper/inputs.tf index 73ddf01..3874747 100644 --- a/database/zookeeper/inputs.tf +++ b/database/zookeeper/inputs.tf @@ -96,24 +96,6 @@ variable "not_responding_threshold_warning" { description = "Zookeeper not responding limit (warning threshold)" } -variable "not_responding_notify_audit" { - description = "Enable or not notify audit on Zookeeper not responding monitor" - type = bool - default = false -} - -variable "not_responding_locked" { - description = "Lock Zookeeper not responding monitor" - type = bool - default = false -} - -variable "not_responding_timeout_h" { - description = "Number of hour of Zookeeper not responding monitor not reporting data before it will automatically resolve from a triggered state" - type = number - default = 0 -} - # # Check read latency monitor # @@ -163,14 +145,3 @@ variable "zookeeper_latency_availability_extra_tags" { default = [] } -variable "zookeeper_latency_notify_audit" { - description = "Enable or not notify audit on Zookeeper latency monitor" - type = bool - default = false -} - -variable "zookeeper_latency_timeout_h" { - description = "Number of hour of Zookeeper latency monitor not reporting data before it will automatically resolve from a triggered state" - type = number - default = 0 -} diff --git a/database/zookeeper/modules.tf b/database/zookeeper/modules.tf index 6b17d21..f0cf5b4 100644 --- a/database/zookeeper/modules.tf +++ b/database/zookeeper/modules.tf @@ -8,13 +8,3 @@ module "filter-tags" { filter_tags_custom_excluded = var.filter_tags_custom_excluded } -module "filter-tags-check-process" { - source = "../../common/filter-tags" - - environment = var.environment - resource = "zookeeper" - filter_tags_use_defaults = var.filter_tags_use_defaults - filter_tags_custom = var.filter_tags_custom - filter_tags_custom_excluded = var.filter_tags_custom_excluded - extra_tags = ["dd_process_name:zookeeper"] -} diff --git a/database/zookeeper/monitors-zookeeper.tf b/database/zookeeper/monitors-zookeeper.tf index d6ee28e..884bb6a 100644 --- a/database/zookeeper/monitors-zookeeper.tf +++ b/database/zookeeper/monitors-zookeeper.tf @@ -16,9 +16,9 @@ EOQ no_data_timeframe = var.not_responding_no_data_timeframe new_host_delay = var.new_host_delay notify_no_data = var.not_responding_notify_no_data - notify_audit = var.not_responding_notify_audit - locked = var.not_responding_locked - timeout_h = var.not_responding_timeout_h + notify_audit = false + locked = false + timeout_h = 0 include_tags = true require_full_window = true renotify_interval = 0 @@ -50,8 +50,8 @@ EOQ notify_no_data = false evaluation_delay = var.evaluation_delay new_host_delay = var.new_host_delay - notify_audit = var.zookeeper_latency_notify_audit - timeout_h = var.zookeeper_latency_timeout_h + notify_audit = false + timeout_h = 0 include_tags = true locked = false require_full_window = true