MON-224 Generalize anomaly monitors and standardize variable names.

This commit is contained in:
Rafael Romero Carmona 2018-07-30 12:46:30 +02:00 committed by Quentin Manfroi
parent 50906d0940
commit 3535f294a5
3 changed files with 120 additions and 77 deletions

View File

@ -39,30 +39,36 @@ Creates DataDog monitors with the following checks:
| network_connections_threshold_warning | Number of network connections (warning threshold) | string | `3200` | no |
| network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_15m` | no |
| project_id | ID of the GCP Project | string | - | yes |
| queries_changing_anomaly_alert_window | Alert window. | string | `last_30m` | no |
| queries_changing_anomaly_count_default_zero | Count default zero. | string | `false` | no |
| queries_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `agile` | no |
| queries_changing_deviations | Deviations to detect the anomaly | string | `4` | no |
| queries_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no |
| queries_changing_enabled | Whether or not to create the monitor | string | `true` | no |
| queries_changing_extra_tags | Extra tags for GCP Cloud SQL Queries Changing monitor | list | `<list>` | no |
| queries_changing_message | Custom message for the Queries Changing monitor | string | `` | no |
| queries_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no |
| queries_changing_silenced | Groups to mute for GCP Cloud SQL Queries Changing monitor | map | `<map>` | no |
| queries_changing_threshold_critical | Queries Changing critical threshold | string | `1` | no |
| queries_changing_threshold_critical_recovery | Queries Changing critical recovery threshold | string | `0.99` | no |
| queries_changing_threshold_warning | Queries Changing warning threshold | string | `0.5` | no |
| queries_changing_timeframe | Timeframe for the Queries Changing mon monitor | string | `last_1h` | no |
| queries_changing_anomaly_deviations | Deviations to detect the anomaly | string | `4` | no |
| queries_changing_anomaly_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no |
| queries_changing_anomaly_enabled | Whether or not to create the monitor | string | `true` | no |
| queries_changing_anomaly_extra_tags | Extra tags for GCP Cloud SQL Queries Changing monitor | list | `<list>` | no |
| queries_changing_anomaly_interval | Interval. | string | `20` | no |
| queries_changing_anomaly_message | Custom message for the Queries Changing monitor | string | `` | no |
| queries_changing_anomaly_seasonality | Seasonality of the algorithm | string | `weekly` | no |
| queries_changing_anomaly_silenced | Groups to mute for GCP Cloud SQL Queries Changing monitor | map | `<map>` | no |
| queries_changing_anomaly_threshold_critical | Queries Changing critical threshold | string | `1` | no |
| queries_changing_anomaly_threshold_critical_recovery | Queries Changing critical recovery threshold | string | `0.99` | no |
| queries_changing_anomaly_threshold_warning | Queries Changing warning threshold | string | `0.5` | no |
| queries_changing_anomaly_timeframe | Timeframe for the Queries Changing mon monitor | string | `last_1h` | no |
| questions_changing_anomaly_alert_window | Alert window. | string | `last_30m` | no |
| questions_changing_anomaly_count_default_zero | Count default zero. | string | `false` | no |
| questions_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `agile` | no |
| questions_changing_deviations | Deviations to detect the anomaly | string | `4` | no |
| questions_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no |
| questions_changing_enabled | Whether or not to create the monitor | string | `true` | no |
| questions_changing_extra_tags | Extra tags for GCP Cloud SQL Questions Changing monitor | list | `<list>` | no |
| questions_changing_message | Custom message for the Questions Changing monitor | string | `` | no |
| questions_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no |
| questions_changing_silenced | Groups to mute for GCP Cloud SQL Questions Changing monitor | map | `<map>` | no |
| questions_changing_threshold_critical | Questions Changing critical threshold | string | `1` | no |
| questions_changing_threshold_critical_recovery | Questions Changing critical recovery threshold | string | `0.99` | no |
| questions_changing_threshold_warning | Questions Changing warning threshold | string | `0.5` | no |
| questions_changing_timeframe | Timeframe for the Questions Changing monitor | string | `last_1h` | no |
| questions_changing_anomaly_deviations | Deviations to detect the anomaly | string | `4` | no |
| questions_changing_anomaly_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no |
| questions_changing_anomaly_enabled | Whether or not to create the monitor | string | `true` | no |
| questions_changing_anomaly_extra_tags | Extra tags for GCP Cloud SQL Questions Changing monitor | list | `<list>` | no |
| questions_changing_anomaly_interval | Interval. | string | `20` | no |
| questions_changing_anomaly_message | Custom message for the Questions Changing monitor | string | `` | no |
| questions_changing_anomaly_seasonality | Seasonality of the algorithm | string | `weekly` | no |
| questions_changing_anomaly_silenced | Groups to mute for GCP Cloud SQL Questions Changing monitor | map | `<map>` | no |
| questions_changing_anomaly_threshold_critical | Questions Changing critical threshold | string | `1` | no |
| questions_changing_anomaly_threshold_critical_recovery | Questions Changing critical recovery threshold | string | `0.99` | no |
| questions_changing_anomaly_threshold_warning | Questions Changing warning threshold | string | `0.5` | no |
| questions_changing_anomaly_timeframe | Timeframe for the Questions Changing monitor | string | `last_1h` | no |
| replication_lag_enabled | Whether or not to create the monitor | string | `true` | no |
| replication_lag_extra_tags | Extra tags for GCP Cloud SQL SQL Replication monitor | list | `<list>` | no |
| replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no |

View File

@ -132,20 +132,19 @@ variable "replication_lag_extra_tags" {
#
# Queries Changing Abnormally
#
variable "queries_changing_enabled" {
variable "queries_changing_anomaly_enabled" {
description = "Whether or not to create the monitor"
type = "string"
default = "true"
}
variable "queries_changing_message" {
variable "queries_changing_anomaly_message" {
description = "Custom message for the Queries Changing monitor"
type = "string"
default = ""
}
variable "queries_changing_timeframe" {
variable "queries_changing_anomaly_timeframe" {
description = "Timeframe for the Queries Changing mon monitor"
type = "string"
default = "last_1h"
@ -157,49 +156,67 @@ variable "queries_changing_anomaly_detection_algorithm" {
default = "agile"
}
variable "queries_changing_deviations" {
variable "queries_changing_anomaly_deviations" {
description = "Deviations to detect the anomaly"
type = "string"
default = 4
}
variable "queries_changing_direction" {
variable "queries_changing_anomaly_direction" {
description = "Direction of the anomaly. It can be both, below or above."
type = "string"
default = "both"
}
variable "queries_changing_seasonality" {
variable "queries_changing_anomaly_alert_window" {
description = "Alert window."
type = "string"
default = "last_30m"
}
variable "queries_changing_anomaly_interval" {
description = "Interval."
type = "string"
default = 20
}
variable "queries_changing_anomaly_count_default_zero" {
description = "Count default zero."
type = "string"
default = "false"
}
variable "queries_changing_anomaly_seasonality" {
description = "Seasonality of the algorithm"
type = "string"
default = "weekly"
}
variable "queries_changing_threshold_warning" {
variable "queries_changing_anomaly_threshold_warning" {
description = "Queries Changing warning threshold"
type = "string"
default = 0.5
}
variable "queries_changing_threshold_critical" {
variable "queries_changing_anomaly_threshold_critical" {
description = "Queries Changing critical threshold"
type = "string"
default = 1
}
variable "queries_changing_threshold_critical_recovery" {
variable "queries_changing_anomaly_threshold_critical_recovery" {
description = "Queries Changing critical recovery threshold"
type = "string"
default = 0.99
}
variable "queries_changing_silenced" {
variable "queries_changing_anomaly_silenced" {
description = "Groups to mute for GCP Cloud SQL Queries Changing monitor"
type = "map"
default = {}
}
variable "queries_changing_extra_tags" {
variable "queries_changing_anomaly_extra_tags" {
description = "Extra tags for GCP Cloud SQL Queries Changing monitor"
type = "list"
default = []
@ -208,19 +225,19 @@ variable "queries_changing_extra_tags" {
#
# Questions Changing
#
variable "questions_changing_enabled" {
variable "questions_changing_anomaly_enabled" {
description = "Whether or not to create the monitor"
type = "string"
default = "true"
}
variable "questions_changing_message" {
variable "questions_changing_anomaly_message" {
description = "Custom message for the Questions Changing monitor"
type = "string"
default = ""
}
variable "questions_changing_timeframe" {
variable "questions_changing_anomaly_timeframe" {
description = "Timeframe for the Questions Changing monitor"
type = "string"
default = "last_1h"
@ -232,49 +249,67 @@ variable "questions_changing_anomaly_detection_algorithm" {
default = "agile"
}
variable "questions_changing_deviations" {
variable "questions_changing_anomaly_alert_window" {
description = "Alert window."
type = "string"
default = "last_30m"
}
variable "questions_changing_anomaly_interval" {
description = "Interval."
type = "string"
default = 20
}
variable "questions_changing_anomaly_count_default_zero" {
description = "Count default zero."
type = "string"
default = "false"
}
variable "questions_changing_anomaly_deviations" {
description = "Deviations to detect the anomaly"
type = "string"
default = 4
}
variable "questions_changing_direction" {
variable "questions_changing_anomaly_direction" {
description = "Direction of the anomaly. It can be both, below or above."
type = "string"
default = "both"
}
variable "questions_changing_seasonality" {
variable "questions_changing_anomaly_seasonality" {
description = "Seasonality of the algorithm"
type = "string"
default = "weekly"
}
variable "questions_changing_threshold_warning" {
variable "questions_changing_anomaly_threshold_warning" {
description = "Questions Changing warning threshold"
type = "string"
default = 0.5
}
variable "questions_changing_threshold_critical" {
variable "questions_changing_anomaly_threshold_critical" {
description = "Questions Changing critical threshold"
type = "string"
default = 1
}
variable "questions_changing_threshold_critical_recovery" {
variable "questions_changing_anomaly_threshold_critical_recovery" {
description = "Questions Changing critical recovery threshold"
type = "string"
default = 0.99
}
variable "questions_changing_silenced" {
variable "questions_changing_anomaly_silenced" {
description = "Groups to mute for GCP Cloud SQL Questions Changing monitor"
type = "map"
default = {}
}
variable "questions_changing_extra_tags" {
variable "questions_changing_anomaly_extra_tags" {
description = "Extra tags for GCP Cloud SQL Questions Changing monitor"
type = "list"
default = []

View File

@ -41,9 +41,11 @@ EOF
timeout_h = 0
include_tags = true
locked = false
evaluation_delay = "${var.delay}"
new_host_delay = "${var.delay}"
silenced = "${var.network_connections_silenced}"
evaluation_delay = "${var.delay}"
new_host_delay = "${var.delay}"
silenced = "${var.network_connections_silenced}"
tags = [
"team:gcp",
@ -108,32 +110,32 @@ EOF
# Queries Anomaly
#
resource "datadog_monitor" "queries_changing_anomaly" {
count = "${var.queries_changing_enabled} == true ? 1 : 0 "
count = "${var.queries_changing_anomaly_enabled} == true ? 1 : 0 "
name = "[${var.environment}] Cloud SQL MySQL Queries Count changed abnormally"
message = "${coalesce(var.queries_changing_message, var.message)}"
message = "${coalesce(var.queries_changing_anomaly_message, var.message)}"
type = "query alert"
query = <<EOF
avg(${var.queries_changing_timeframe}):
avg(${var.queries_changing_anomaly_timeframe}):
anomalies(
avg:gcp.cloudsql.database.mysql.queries{${data.template_file.filter.rendered}} by {database_id}.as_count()
'${var.queries_changing_anomaly_detection_algorithm}',
${var.queries_changing_deviations},
direction='${var.queries_changing_direction}',
alert_window='last_30m',
interval=20,
count_default_zero='false',
seasonality='${var.queries_changing_seasonality}'
${var.queries_changing_anomaly_deviations},
direction='${var.queries_changing_anomaly_direction}',
alert_window='${var.queries_changing_anomaly_alert_window}',
interval=${var.queries_changing_anomaly_interval},
count_default_zero='${var.queries_changing_anomaly_count_default_zero}',
seasonality='${var.queries_changing_anomaly_seasonality}'
)
> ${var.queries_changing_threshold_critical}
> ${var.queries_changing_anomaly_threshold_critical}
EOF
thresholds {
warning = "${var.queries_changing_threshold_warning}"
critical = "${var.queries_changing_threshold_critical}"
critical_recovery = "${var.queries_changing_threshold_critical_recovery}"
warning = "${var.queries_changing_anomaly_threshold_warning}"
critical = "${var.queries_changing_anomaly_threshold_critical}"
critical_recovery = "${var.queries_changing_anomaly_threshold_critical_recovery}"
}
notify_audit = false
@ -147,7 +149,7 @@ EOF
evaluation_delay = "${var.delay}"
new_host_delay = "${var.delay}"
silenced = "${var.queries_changing_silenced}"
silenced = "${var.queries_changing_anomaly_silenced}"
tags = [
"team:gcp",
@ -156,7 +158,7 @@ EOF
"created_by:terraform",
"resource:cloud-sql",
"engine:mysql",
"${var.queries_changing_extra_tags}",
"${var.queries_changing_anomaly_extra_tags}",
]
}
@ -164,10 +166,10 @@ EOF
# Questions Anomaly
#
resource "datadog_monitor" "questions_changing_anomaly" {
count = "${var.questions_changing_enabled} == true ? 1 : 0 "
count = "${var.questions_changing_anomaly_enabled} == true ? 1 : 0 "
name = "[${var.environment}] Cloud SQL MySQL Questions Count changed abnormally"
message = "${coalesce(var.questions_changing_message, var.message)}"
message = "${coalesce(var.questions_changing_anomaly_message, var.message)}"
type = "query alert"
@ -176,20 +178,20 @@ resource "datadog_monitor" "questions_changing_anomaly" {
anomalies(
avg:gcp.cloudsql.database.mysql.questions{${data.template_file.filter.rendered}} by {database_id},
'${var.questions_changing_anomaly_detection_algorithm}',
${var.questions_changing_deviations},
direction='${var.questions_changing_direction}',
alert_window='last_30m',
interval=20,
count_default_zero='false',
seasonality='${var.questions_changing_seasonality}'
${var.questions_changing_anomaly_deviations},
direction='${var.questions_changing_anomaly_direction}',
alert_window='${var.questions_changing_anomaly_alert_window}',
interval=${var.questions_changing_anomaly_interval},
count_default_zero='${var.questions_changing_anomaly_count_default_zero}',
seasonality='${var.questions_changing_anomaly_seasonality}'
)
> ${var.questions_changing_threshold_critical}
> ${var.questions_changing_anomaly_threshold_critical}
EOF
thresholds {
warning = "${var.questions_changing_threshold_warning}"
critical = "${var.questions_changing_threshold_critical}"
critical_recovery = "${var.questions_changing_threshold_critical_recovery}"
warning = "${var.questions_changing_anomaly_threshold_warning}"
critical = "${var.questions_changing_anomaly_threshold_critical}"
critical_recovery = "${var.questions_changing_anomaly_threshold_critical_recovery}"
}
notify_audit = false
@ -203,7 +205,7 @@ EOF
evaluation_delay = "${var.delay}"
new_host_delay = "${var.delay}"
silenced = "${var.questions_changing_silenced}"
silenced = "${var.questions_changing_anomaly_silenced}"
tags = [
"team:gcp",
@ -212,6 +214,6 @@ EOF
"created_by:terraform",
"resource:cloud-sql",
"engine:mysql",
"${var.questions_changing_extra_tags}",
"${var.questions_changing_anomaly_extra_tags}",
]
}