From 724355b04c0beeab5d18543468ae951cc5d6b5b1 Mon Sep 17 00:00:00 2001 From: Matthieu Bourgain Date: Thu, 22 Oct 2020 17:32:47 +0200 Subject: [PATCH] MN-587 Add ProxySQL monitors --- README.md | 1 + database/proxysql/README.md | 94 +++++++++ database/proxysql/inputs.tf | 266 +++++++++++++++++++++++++ database/proxysql/modules.tf | 10 + database/proxysql/monitors-proxysql.tf | 154 ++++++++++++++ database/proxysql/outputs.tf | 25 +++ database/proxysql/versions.tf | 8 + 7 files changed, 558 insertions(+) create mode 100644 database/proxysql/README.md create mode 100644 database/proxysql/inputs.tf create mode 100644 database/proxysql/modules.tf create mode 100644 database/proxysql/monitors-proxysql.tf create mode 100644 database/proxysql/outputs.tf create mode 100644 database/proxysql/versions.tf diff --git a/README.md b/README.md index bc51184..5afeda6 100644 --- a/README.md +++ b/README.md @@ -214,6 +214,7 @@ For example, this will regenerate every READMEs thanks to [terraform-docs](https - [mongodb](https://github.com/claranet/terraform-datadog-monitors/tree/master/database/mongodb/) - [mysql](https://github.com/claranet/terraform-datadog-monitors/tree/master/database/mysql/) - [postgresql](https://github.com/claranet/terraform-datadog-monitors/tree/master/database/postgresql/) + - [proxysql](https://github.com/claranet/terraform-datadog-monitors/tree/master/database/proxysql/) - [redis](https://github.com/claranet/terraform-datadog-monitors/tree/master/database/redis/) - [solr](https://github.com/claranet/terraform-datadog-monitors/tree/master/database/solr/) - [sqlserver](https://github.com/claranet/terraform-datadog-monitors/tree/master/database/sqlserver/) diff --git a/database/proxysql/README.md b/database/proxysql/README.md new file mode 100644 index 0000000..bb8c13f --- /dev/null +++ b/database/proxysql/README.md @@ -0,0 +1,94 @@ +# DATABASE PROXYSQL DataDog monitors + +## How to use this module + +```hcl +module "datadog-monitors-database-proxysql" { + source = "claranet/monitors/datadog//database/proxysql" + version = "{revision}" + + environment = var.environment + message = module.datadog-message-alerting.alerting-message +} + +``` + +## Purpose + +Creates DataDog monitors with the following checks: + +- ProxySQL Client connections aborted +- ProxySQL Pool connections failure +- ProxySQL Server connections aborted +- ProxySQL Slow queries +- ProxySQL Thread Worker + +## Requirements + +| Name | Version | +|------|---------| +| terraform | >= 0.12.26 | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| environment | Environment | `string` | n/a | yes | +| evaluation\_delay | Delay in seconds for the metric evaluation | `number` | `15` | no | +| filter\_tags\_custom | Tags used for custom filtering when filter\_tags\_use\_defaults is false | `string` | `"*"` | no | +| filter\_tags\_custom\_excluded | Tags excluded for custom filtering when filter\_tags\_use\_defaults is false | `string` | `""` | no | +| filter\_tags\_use\_defaults | Use default filter tags convention | `string` | `"true"` | no | +| message | Message sent when an alert is triggered | `any` | n/a | yes | +| new\_host\_delay | Delay in seconds for the metric evaluation | `number` | `300` | no | +| notify\_no\_data | Will raise no data alert if set to true | `bool` | `false` | no | +| prefix\_slug | Prefix string to prepend between brackets on every monitors names | `string` | `""` | no | +| proxysql\_client\_conn\_aborted\_enabled | Flag to enable ProxySQL client connections aborted monitor | `string` | `"true"` | no | +| proxysql\_client\_conn\_aborted\_extra\_tags | Extra tags for ProxySQL client connections aborted monitor | `list(string)` | `[]` | no | +| proxysql\_client\_conn\_aborted\_message | Custom message for ProxySQL client connections aborted monitor | `string` | `""` | no | +| proxysql\_client\_conn\_aborted\_threshold\_critical | Maximum critical acceptable percent of aborted connects | `number` | `10` | no | +| proxysql\_client\_conn\_aborted\_threshold\_warning | Maximum warning acceptable percent of aborted connects | `number` | `1` | no | +| proxysql\_client\_conn\_aborted\_time\_aggregator | Monitor time aggregator for ProxySQL client connections aborted monitor [available values: min, max or avg] | `string` | `"avg"` | no | +| proxysql\_client\_conn\_aborted\_timeframe | Monitor timeframe for ProxySQL client connections aborted monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | `string` | `"last_10m"` | no | +| proxysql\_pool\_conn\_failure\_enabled | Flag to enable ProxySQL pool connections failure monitor | `string` | `"true"` | no | +| proxysql\_pool\_conn\_failure\_extra\_tags | Extra tags for ProxySQL pool connections failure monitor | `list(string)` | `[]` | no | +| proxysql\_pool\_conn\_failure\_message | Custom message for ProxySQL pool connections failure monitor | `string` | `""` | no | +| proxysql\_pool\_conn\_failure\_threshold\_critical | Maximum critical acceptable of pool connections failure | `number` | `20` | no | +| proxysql\_pool\_conn\_failure\_threshold\_warning | Maximum warning acceptable of pool connections failure | `number` | `1` | no | +| proxysql\_pool\_conn\_failure\_time\_aggregator | Monitor time aggregator for ProxySQL pool connections failure monitor [available values: min, max or avg] | `string` | `"avg"` | no | +| proxysql\_pool\_conn\_failure\_timeframe | Monitor timeframe for ProxySQL pool connections failure monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | `string` | `"last_15m"` | no | +| proxysql\_server\_conn\_aborted\_enabled | Flag to enable ProxySQL server connections aborted monitor | `string` | `"true"` | no | +| proxysql\_server\_conn\_aborted\_extra\_tags | Extra tags for ProxySQL server connections aborted monitor | `list(string)` | `[]` | no | +| proxysql\_server\_conn\_aborted\_message | Custom message for ProxySQL server connections aborted monitor | `string` | `""` | no | +| proxysql\_server\_conn\_aborted\_threshold\_critical | Maximum critical acceptable percent of aborted connects | `number` | `10` | no | +| proxysql\_server\_conn\_aborted\_threshold\_warning | Maximum warning acceptable percent of aborted connects | `number` | `1` | no | +| proxysql\_server\_conn\_aborted\_time\_aggregator | Monitor time aggregator for ProxySQL server connections aborted monitor [available values: min, max or avg] | `string` | `"avg"` | no | +| proxysql\_server\_conn\_aborted\_timeframe | Monitor timeframe for ProxySQL server connections aborted monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | `string` | `"last_10m"` | no | +| proxysql\_slow\_enabled | Flag to enable ProxySQL slow queries monitor | `string` | `"true"` | no | +| proxysql\_slow\_extra\_tags | Extra tags for ProxySQL slow queries monitor | `list(string)` | `[]` | no | +| proxysql\_slow\_message | Custom message for ProxySQL slow queries monitor | `string` | `""` | no | +| proxysql\_slow\_threshold\_critical | Maximum critical acceptable of slow queries | `number` | `20` | no | +| proxysql\_slow\_threshold\_warning | Maximum warning acceptable of slow queries | `number` | `1` | no | +| proxysql\_slow\_time\_aggregator | Monitor time aggregator for ProxySQL slow queries monitor [available values: min, max or avg] | `string` | `"avg"` | no | +| proxysql\_slow\_timeframe | Monitor timeframe for ProxySQL slow queries monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | `string` | `"last_15m"` | no | +| proxysql\_thread\_worker\_enabled | Flag to enable ProxySQL thread worker monitor | `string` | `"true"` | no | +| proxysql\_thread\_worker\_extra\_tags | Extra tags for ProxySQL thread worker monitor | `list(string)` | `[]` | no | +| proxysql\_thread\_worker\_message | Custom message for ProxySQL thread worker monitor | `string` | `""` | no | +| proxysql\_thread\_worker\_threshold\_critical | Minimum critical acceptable of thread worker running | `number` | `1` | no | +| proxysql\_thread\_worker\_threshold\_warning | Minimum warning acceptable of thread worker running | `number` | `4` | no | +| proxysql\_thread\_worker\_time\_aggregator | Monitor time aggregator for ProxySQL thread worker monitor [available values: min, max or avg] | `string` | `"avg"` | no | +| proxysql\_thread\_worker\_timeframe | Monitor timeframe for ProxySQL thread worker monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | `string` | `"last_5m"` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| proxysql\_client\_conn\_aborted\_id | id for monitor proxysql\_client\_conn\_aborted | +| proxysql\_pool\_conn\_failure\_id | id for monitor proxysql\_pool\_conn\_failure | +| proxysql\_server\_conn\_aborted\_id | id for monitor proxysql\_server\_conn\_aborted | +| proxysql\_slow\_id | id for monitor proxysql\_slow | +| proxysql\_thread\_worker\_id | id for monitor proxysql\_thread\_worker | + +## Related documentation + +* [Datadog documentation](https://docs.datadoghq.com/integrations/proxysql/) +* [ProxySQL documentation](https://proxysql.com/documentation/) diff --git a/database/proxysql/inputs.tf b/database/proxysql/inputs.tf new file mode 100644 index 0000000..7314488 --- /dev/null +++ b/database/proxysql/inputs.tf @@ -0,0 +1,266 @@ +variable "environment" { + description = "Environment" + type = string +} + +variable "evaluation_delay" { + description = "Delay in seconds for the metric evaluation" + default = 15 +} + +variable "new_host_delay" { + description = "Delay in seconds for the metric evaluation" + default = 300 +} + +variable "prefix_slug" { + description = "Prefix string to prepend between brackets on every monitors names" + default = "" +} + +variable "notify_no_data" { + description = "Will raise no data alert if set to true" + default = false +} + +variable "message" { + description = "Message sent when an alert is triggered" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +variable "filter_tags_custom_excluded" { + description = "Tags excluded for custom filtering when filter_tags_use_defaults is false" + default = "" +} + +# ProxySQL specific + +################################## +### ProxySQL thread worker ### +################################## + +variable "proxysql_thread_worker_enabled" { + description = "Flag to enable ProxySQL thread worker monitor" + type = string + default = "true" +} + +variable "proxysql_thread_worker_extra_tags" { + description = "Extra tags for ProxySQL thread worker monitor" + type = list(string) + default = [] +} + +variable "proxysql_thread_worker_message" { + description = "Custom message for ProxySQL thread worker monitor" + type = string + default = "" +} + +variable "proxysql_thread_worker_threshold_critical" { + description = "Minimum critical acceptable of thread worker running" + default = 1 +} + +variable "proxysql_thread_worker_threshold_warning" { + description = "Minimum warning acceptable of thread worker running" + default = 4 +} + +variable "proxysql_thread_worker_time_aggregator" { + description = "Monitor time aggregator for ProxySQL thread worker monitor [available values: min, max or avg]" + type = string + default = "avg" +} + +variable "proxysql_thread_worker_timeframe" { + description = "Monitor timeframe for ProxySQL thread worker monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = string + default = "last_5m" +} + +################################# +### ProxySQL slow queries ### +################################# + +variable "proxysql_slow_enabled" { + description = "Flag to enable ProxySQL slow queries monitor" + type = string + default = "true" +} + +variable "proxysql_slow_extra_tags" { + description = "Extra tags for ProxySQL slow queries monitor" + type = list(string) + default = [] +} + +variable "proxysql_slow_message" { + description = "Custom message for ProxySQL slow queries monitor" + type = string + default = "" +} + +variable "proxysql_slow_threshold_critical" { + description = "Maximum critical acceptable of slow queries" + default = 20 +} + +variable "proxysql_slow_threshold_warning" { + description = "Maximum warning acceptable of slow queries" + default = 1 +} + +variable "proxysql_slow_time_aggregator" { + description = "Monitor time aggregator for ProxySQL slow queries monitor [available values: min, max or avg]" + type = string + default = "avg" +} + +variable "proxysql_slow_timeframe" { + description = "Monitor timeframe for ProxySQL slow queries monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = string + default = "last_15m" +} + +########################################## +### ProxySQL Client aborted connects ### +########################################## + +variable "proxysql_client_conn_aborted_enabled" { + description = "Flag to enable ProxySQL client connections aborted monitor" + type = string + default = "true" +} + +variable "proxysql_client_conn_aborted_extra_tags" { + description = "Extra tags for ProxySQL client connections aborted monitor" + type = list(string) + default = [] +} + +variable "proxysql_client_conn_aborted_message" { + description = "Custom message for ProxySQL client connections aborted monitor" + type = string + default = "" +} + +variable "proxysql_client_conn_aborted_threshold_critical" { + description = "Maximum critical acceptable percent of aborted connects" + default = 10 +} + +variable "proxysql_client_conn_aborted_threshold_warning" { + description = "Maximum warning acceptable percent of aborted connects" + default = 1 +} + +variable "proxysql_client_conn_aborted_time_aggregator" { + description = "Monitor time aggregator for ProxySQL client connections aborted monitor [available values: min, max or avg]" + type = string + default = "avg" +} + +variable "proxysql_client_conn_aborted_timeframe" { + description = "Monitor timeframe for ProxySQL client connections aborted monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = string + default = "last_10m" +} + +########################################## +### ProxySQL Server aborted connects ### +########################################## + +variable "proxysql_server_conn_aborted_enabled" { + description = "Flag to enable ProxySQL server connections aborted monitor" + type = string + default = "true" +} + +variable "proxysql_server_conn_aborted_extra_tags" { + description = "Extra tags for ProxySQL server connections aborted monitor" + type = list(string) + default = [] +} + +variable "proxysql_server_conn_aborted_message" { + description = "Custom message for ProxySQL server connections aborted monitor" + type = string + default = "" +} + +variable "proxysql_server_conn_aborted_threshold_critical" { + description = "Maximum critical acceptable percent of aborted connects" + default = 10 +} + +variable "proxysql_server_conn_aborted_threshold_warning" { + description = "Maximum warning acceptable percent of aborted connects" + default = 1 +} + +variable "proxysql_server_conn_aborted_time_aggregator" { + description = "Monitor time aggregator for ProxySQL server connections aborted monitor [available values: min, max or avg]" + type = string + default = "avg" +} + +variable "proxysql_server_conn_aborted_timeframe" { + description = "Monitor timeframe for ProxySQL server connections aborted monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = string + default = "last_10m" +} + + +########################################## +### ProxySQL Pool Connection Failure ### +########################################## + +variable "proxysql_pool_conn_failure_enabled" { + description = "Flag to enable ProxySQL pool connections failure monitor" + type = string + default = "true" +} + +variable "proxysql_pool_conn_failure_extra_tags" { + description = "Extra tags for ProxySQL pool connections failure monitor" + type = list(string) + default = [] +} + +variable "proxysql_pool_conn_failure_message" { + description = "Custom message for ProxySQL pool connections failure monitor" + type = string + default = "" +} + +variable "proxysql_pool_conn_failure_threshold_critical" { + description = "Maximum critical acceptable of pool connections failure" + default = 20 +} + +variable "proxysql_pool_conn_failure_threshold_warning" { + description = "Maximum warning acceptable of pool connections failure" + default = 1 +} + +variable "proxysql_pool_conn_failure_time_aggregator" { + description = "Monitor time aggregator for ProxySQL pool connections failure monitor [available values: min, max or avg]" + type = string + default = "avg" +} + +variable "proxysql_pool_conn_failure_timeframe" { + description = "Monitor timeframe for ProxySQL pool connections failure monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = string + default = "last_15m" +} diff --git a/database/proxysql/modules.tf b/database/proxysql/modules.tf new file mode 100644 index 0000000..ac04d58 --- /dev/null +++ b/database/proxysql/modules.tf @@ -0,0 +1,10 @@ +module "filter-tags" { + source = "../../common/filter-tags" + + environment = var.environment + resource = "proxysql" + filter_tags_use_defaults = var.filter_tags_use_defaults + filter_tags_custom = var.filter_tags_custom + filter_tags_custom_excluded = var.filter_tags_custom_excluded +} + diff --git a/database/proxysql/monitors-proxysql.tf b/database/proxysql/monitors-proxysql.tf new file mode 100644 index 0000000..9ea8914 --- /dev/null +++ b/database/proxysql/monitors-proxysql.tf @@ -0,0 +1,154 @@ +resource "datadog_monitor" "proxysql_thread_worker" { + count = var.proxysql_thread_worker_enabled == "true" ? 1 : 0 + name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ProxySQL Thread Worker {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = coalesce(var.proxysql_thread_worker_message, var.message) + type = "query alert" + + query = < ${var.proxysql_slow_threshold_critical} +EOQ + + thresholds = { + warning = var.proxysql_slow_threshold_warning + critical = var.proxysql_slow_threshold_critical + } + + evaluation_delay = var.evaluation_delay + new_host_delay = var.new_host_delay + notify_no_data = false + renotify_interval = 0 + require_full_window = true + timeout_h = 0 + include_tags = true + + tags = concat(["env:${var.environment}", "type:database", "provider:proxysql", "resource:proxysql", "team:claranet", "created-by:terraform"], var.proxysql_slow_extra_tags) + + lifecycle { + ignore_changes = [silenced] + } +} + +resource "datadog_monitor" "proxysql_client_conn_aborted" { + count = var.proxysql_client_conn_aborted_enabled == "true" ? 1 : 0 + name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ProxySQL Client connections aborted {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = coalesce(var.proxysql_client_conn_aborted_message, var.message) + type = "query alert" + + query = < ${var.proxysql_client_conn_aborted_threshold_critical} +EOQ + + thresholds = { + warning = var.proxysql_client_conn_aborted_threshold_warning + critical = var.proxysql_client_conn_aborted_threshold_critical + } + + evaluation_delay = var.evaluation_delay + new_host_delay = var.new_host_delay + notify_no_data = false + renotify_interval = 0 + require_full_window = true + timeout_h = 0 + include_tags = true + + tags = concat(["env:${var.environment}", "type:database", "provider:proxysql", "resource:proxysql", "team:claranet", "created-by:terraform"], var.proxysql_client_conn_aborted_extra_tags) + + lifecycle { + ignore_changes = [silenced] + } +} + +resource "datadog_monitor" "proxysql_server_conn_aborted" { + count = var.proxysql_server_conn_aborted_enabled == "true" ? 1 : 0 + name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ProxySQL Server connections aborted {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = coalesce(var.proxysql_server_conn_aborted_message, var.message) + type = "query alert" + + query = < ${var.proxysql_server_conn_aborted_threshold_critical} +EOQ + + thresholds = { + warning = var.proxysql_server_conn_aborted_threshold_warning + critical = var.proxysql_server_conn_aborted_threshold_critical + } + + evaluation_delay = var.evaluation_delay + new_host_delay = var.new_host_delay + notify_no_data = false + renotify_interval = 0 + require_full_window = true + timeout_h = 0 + include_tags = true + + tags = concat(["env:${var.environment}", "type:database", "provider:proxysql", "resource:proxysql", "team:claranet", "created-by:terraform"], var.proxysql_server_conn_aborted_extra_tags) + + lifecycle { + ignore_changes = [silenced] + } +} + +resource "datadog_monitor" "proxysql_pool_conn_failure" { + count = var.proxysql_pool_conn_failure_enabled == "true" ? 1 : 0 + name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] ProxySQL Pool connections failure {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = coalesce(var.proxysql_pool_conn_failure_message, var.message) + type = "query alert" + + query = < ${var.proxysql_pool_conn_failure_threshold_critical} +EOQ + + thresholds = { + warning = var.proxysql_pool_conn_failure_threshold_warning + critical = var.proxysql_pool_conn_failure_threshold_critical + } + + evaluation_delay = var.evaluation_delay + new_host_delay = var.new_host_delay + notify_no_data = false + renotify_interval = 0 + require_full_window = true + timeout_h = 0 + include_tags = true + + tags = concat(["env:${var.environment}", "type:database", "provider:proxysql", "resource:proxysql", "team:claranet", "created-by:terraform"], var.proxysql_pool_conn_failure_extra_tags) + + lifecycle { + ignore_changes = [silenced] + } +} diff --git a/database/proxysql/outputs.tf b/database/proxysql/outputs.tf new file mode 100644 index 0000000..665c7d3 --- /dev/null +++ b/database/proxysql/outputs.tf @@ -0,0 +1,25 @@ +output "proxysql_client_conn_aborted_id" { + description = "id for monitor proxysql_client_conn_aborted" + value = datadog_monitor.proxysql_client_conn_aborted.*.id +} + +output "proxysql_pool_conn_failure_id" { + description = "id for monitor proxysql_pool_conn_failure" + value = datadog_monitor.proxysql_pool_conn_failure.*.id +} + +output "proxysql_server_conn_aborted_id" { + description = "id for monitor proxysql_server_conn_aborted" + value = datadog_monitor.proxysql_server_conn_aborted.*.id +} + +output "proxysql_slow_id" { + description = "id for monitor proxysql_slow" + value = datadog_monitor.proxysql_slow.*.id +} + +output "proxysql_thread_worker_id" { + description = "id for monitor proxysql_thread_worker" + value = datadog_monitor.proxysql_thread_worker.*.id +} + diff --git a/database/proxysql/versions.tf b/database/proxysql/versions.tf new file mode 100644 index 0000000..b46bd44 --- /dev/null +++ b/database/proxysql/versions.tf @@ -0,0 +1,8 @@ +terraform { + required_providers { + datadog = { + source = "terraform-providers/datadog" + } + } + required_version = ">= 0.12.26" +}