MON-224 Generalize anomaly monitors and standardize variable names.

This commit is contained in:
Rafael Romero Carmona 2018-07-30 12:46:30 +02:00 committed by Quentin Manfroi
parent 50906d0940
commit 3535f294a5
3 changed files with 120 additions and 77 deletions

View File

@ -39,30 +39,36 @@ Creates DataDog monitors with the following checks:
| network_connections_threshold_warning | Number of network connections (warning threshold) | string | `3200` | no | | network_connections_threshold_warning | Number of network connections (warning threshold) | string | `3200` | no |
| network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_15m` | no | | network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_15m` | no |
| project_id | ID of the GCP Project | string | - | yes | | project_id | ID of the GCP Project | string | - | yes |
| queries_changing_anomaly_alert_window | Alert window. | string | `last_30m` | no |
| queries_changing_anomaly_count_default_zero | Count default zero. | string | `false` | no |
| queries_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `agile` | no | | queries_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `agile` | no |
| queries_changing_deviations | Deviations to detect the anomaly | string | `4` | no | | queries_changing_anomaly_deviations | Deviations to detect the anomaly | string | `4` | no |
| queries_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | | queries_changing_anomaly_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no |
| queries_changing_enabled | Whether or not to create the monitor | string | `true` | no | | queries_changing_anomaly_enabled | Whether or not to create the monitor | string | `true` | no |
| queries_changing_extra_tags | Extra tags for GCP Cloud SQL Queries Changing monitor | list | `<list>` | no | | queries_changing_anomaly_extra_tags | Extra tags for GCP Cloud SQL Queries Changing monitor | list | `<list>` | no |
| queries_changing_message | Custom message for the Queries Changing monitor | string | `` | no | | queries_changing_anomaly_interval | Interval. | string | `20` | no |
| queries_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | | queries_changing_anomaly_message | Custom message for the Queries Changing monitor | string | `` | no |
| queries_changing_silenced | Groups to mute for GCP Cloud SQL Queries Changing monitor | map | `<map>` | no | | queries_changing_anomaly_seasonality | Seasonality of the algorithm | string | `weekly` | no |
| queries_changing_threshold_critical | Queries Changing critical threshold | string | `1` | no | | queries_changing_anomaly_silenced | Groups to mute for GCP Cloud SQL Queries Changing monitor | map | `<map>` | no |
| queries_changing_threshold_critical_recovery | Queries Changing critical recovery threshold | string | `0.99` | no | | queries_changing_anomaly_threshold_critical | Queries Changing critical threshold | string | `1` | no |
| queries_changing_threshold_warning | Queries Changing warning threshold | string | `0.5` | no | | queries_changing_anomaly_threshold_critical_recovery | Queries Changing critical recovery threshold | string | `0.99` | no |
| queries_changing_timeframe | Timeframe for the Queries Changing mon monitor | string | `last_1h` | no | | queries_changing_anomaly_threshold_warning | Queries Changing warning threshold | string | `0.5` | no |
| queries_changing_anomaly_timeframe | Timeframe for the Queries Changing mon monitor | string | `last_1h` | no |
| questions_changing_anomaly_alert_window | Alert window. | string | `last_30m` | no |
| questions_changing_anomaly_count_default_zero | Count default zero. | string | `false` | no |
| questions_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `agile` | no | | questions_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `agile` | no |
| questions_changing_deviations | Deviations to detect the anomaly | string | `4` | no | | questions_changing_anomaly_deviations | Deviations to detect the anomaly | string | `4` | no |
| questions_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | | questions_changing_anomaly_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no |
| questions_changing_enabled | Whether or not to create the monitor | string | `true` | no | | questions_changing_anomaly_enabled | Whether or not to create the monitor | string | `true` | no |
| questions_changing_extra_tags | Extra tags for GCP Cloud SQL Questions Changing monitor | list | `<list>` | no | | questions_changing_anomaly_extra_tags | Extra tags for GCP Cloud SQL Questions Changing monitor | list | `<list>` | no |
| questions_changing_message | Custom message for the Questions Changing monitor | string | `` | no | | questions_changing_anomaly_interval | Interval. | string | `20` | no |
| questions_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | | questions_changing_anomaly_message | Custom message for the Questions Changing monitor | string | `` | no |
| questions_changing_silenced | Groups to mute for GCP Cloud SQL Questions Changing monitor | map | `<map>` | no | | questions_changing_anomaly_seasonality | Seasonality of the algorithm | string | `weekly` | no |
| questions_changing_threshold_critical | Questions Changing critical threshold | string | `1` | no | | questions_changing_anomaly_silenced | Groups to mute for GCP Cloud SQL Questions Changing monitor | map | `<map>` | no |
| questions_changing_threshold_critical_recovery | Questions Changing critical recovery threshold | string | `0.99` | no | | questions_changing_anomaly_threshold_critical | Questions Changing critical threshold | string | `1` | no |
| questions_changing_threshold_warning | Questions Changing warning threshold | string | `0.5` | no | | questions_changing_anomaly_threshold_critical_recovery | Questions Changing critical recovery threshold | string | `0.99` | no |
| questions_changing_timeframe | Timeframe for the Questions Changing monitor | string | `last_1h` | no | | questions_changing_anomaly_threshold_warning | Questions Changing warning threshold | string | `0.5` | no |
| questions_changing_anomaly_timeframe | Timeframe for the Questions Changing monitor | string | `last_1h` | no |
| replication_lag_enabled | Whether or not to create the monitor | string | `true` | no | | replication_lag_enabled | Whether or not to create the monitor | string | `true` | no |
| replication_lag_extra_tags | Extra tags for GCP Cloud SQL SQL Replication monitor | list | `<list>` | no | | replication_lag_extra_tags | Extra tags for GCP Cloud SQL SQL Replication monitor | list | `<list>` | no |
| replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no | | replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no |

View File

@ -132,20 +132,19 @@ variable "replication_lag_extra_tags" {
# #
# Queries Changing Abnormally # Queries Changing Abnormally
# #
variable "queries_changing_enabled" { variable "queries_changing_anomaly_enabled" {
description = "Whether or not to create the monitor" description = "Whether or not to create the monitor"
type = "string" type = "string"
default = "true" default = "true"
} }
variable "queries_changing_anomaly_message" {
variable "queries_changing_message" {
description = "Custom message for the Queries Changing monitor" description = "Custom message for the Queries Changing monitor"
type = "string" type = "string"
default = "" default = ""
} }
variable "queries_changing_timeframe" { variable "queries_changing_anomaly_timeframe" {
description = "Timeframe for the Queries Changing mon monitor" description = "Timeframe for the Queries Changing mon monitor"
type = "string" type = "string"
default = "last_1h" default = "last_1h"
@ -157,49 +156,67 @@ variable "queries_changing_anomaly_detection_algorithm" {
default = "agile" default = "agile"
} }
variable "queries_changing_deviations" { variable "queries_changing_anomaly_deviations" {
description = "Deviations to detect the anomaly" description = "Deviations to detect the anomaly"
type = "string" type = "string"
default = 4 default = 4
} }
variable "queries_changing_direction" { variable "queries_changing_anomaly_direction" {
description = "Direction of the anomaly. It can be both, below or above." description = "Direction of the anomaly. It can be both, below or above."
type = "string" type = "string"
default = "both" default = "both"
} }
variable "queries_changing_seasonality" { variable "queries_changing_anomaly_alert_window" {
description = "Alert window."
type = "string"
default = "last_30m"
}
variable "queries_changing_anomaly_interval" {
description = "Interval."
type = "string"
default = 20
}
variable "queries_changing_anomaly_count_default_zero" {
description = "Count default zero."
type = "string"
default = "false"
}
variable "queries_changing_anomaly_seasonality" {
description = "Seasonality of the algorithm" description = "Seasonality of the algorithm"
type = "string" type = "string"
default = "weekly" default = "weekly"
} }
variable "queries_changing_threshold_warning" { variable "queries_changing_anomaly_threshold_warning" {
description = "Queries Changing warning threshold" description = "Queries Changing warning threshold"
type = "string" type = "string"
default = 0.5 default = 0.5
} }
variable "queries_changing_threshold_critical" { variable "queries_changing_anomaly_threshold_critical" {
description = "Queries Changing critical threshold" description = "Queries Changing critical threshold"
type = "string" type = "string"
default = 1 default = 1
} }
variable "queries_changing_threshold_critical_recovery" { variable "queries_changing_anomaly_threshold_critical_recovery" {
description = "Queries Changing critical recovery threshold" description = "Queries Changing critical recovery threshold"
type = "string" type = "string"
default = 0.99 default = 0.99
} }
variable "queries_changing_silenced" { variable "queries_changing_anomaly_silenced" {
description = "Groups to mute for GCP Cloud SQL Queries Changing monitor" description = "Groups to mute for GCP Cloud SQL Queries Changing monitor"
type = "map" type = "map"
default = {} default = {}
} }
variable "queries_changing_extra_tags" { variable "queries_changing_anomaly_extra_tags" {
description = "Extra tags for GCP Cloud SQL Queries Changing monitor" description = "Extra tags for GCP Cloud SQL Queries Changing monitor"
type = "list" type = "list"
default = [] default = []
@ -208,19 +225,19 @@ variable "queries_changing_extra_tags" {
# #
# Questions Changing # Questions Changing
# #
variable "questions_changing_enabled" { variable "questions_changing_anomaly_enabled" {
description = "Whether or not to create the monitor" description = "Whether or not to create the monitor"
type = "string" type = "string"
default = "true" default = "true"
} }
variable "questions_changing_message" { variable "questions_changing_anomaly_message" {
description = "Custom message for the Questions Changing monitor" description = "Custom message for the Questions Changing monitor"
type = "string" type = "string"
default = "" default = ""
} }
variable "questions_changing_timeframe" { variable "questions_changing_anomaly_timeframe" {
description = "Timeframe for the Questions Changing monitor" description = "Timeframe for the Questions Changing monitor"
type = "string" type = "string"
default = "last_1h" default = "last_1h"
@ -232,49 +249,67 @@ variable "questions_changing_anomaly_detection_algorithm" {
default = "agile" default = "agile"
} }
variable "questions_changing_deviations" { variable "questions_changing_anomaly_alert_window" {
description = "Alert window."
type = "string"
default = "last_30m"
}
variable "questions_changing_anomaly_interval" {
description = "Interval."
type = "string"
default = 20
}
variable "questions_changing_anomaly_count_default_zero" {
description = "Count default zero."
type = "string"
default = "false"
}
variable "questions_changing_anomaly_deviations" {
description = "Deviations to detect the anomaly" description = "Deviations to detect the anomaly"
type = "string" type = "string"
default = 4 default = 4
} }
variable "questions_changing_direction" { variable "questions_changing_anomaly_direction" {
description = "Direction of the anomaly. It can be both, below or above." description = "Direction of the anomaly. It can be both, below or above."
type = "string" type = "string"
default = "both" default = "both"
} }
variable "questions_changing_seasonality" { variable "questions_changing_anomaly_seasonality" {
description = "Seasonality of the algorithm" description = "Seasonality of the algorithm"
type = "string" type = "string"
default = "weekly" default = "weekly"
} }
variable "questions_changing_threshold_warning" { variable "questions_changing_anomaly_threshold_warning" {
description = "Questions Changing warning threshold" description = "Questions Changing warning threshold"
type = "string" type = "string"
default = 0.5 default = 0.5
} }
variable "questions_changing_threshold_critical" { variable "questions_changing_anomaly_threshold_critical" {
description = "Questions Changing critical threshold" description = "Questions Changing critical threshold"
type = "string" type = "string"
default = 1 default = 1
} }
variable "questions_changing_threshold_critical_recovery" { variable "questions_changing_anomaly_threshold_critical_recovery" {
description = "Questions Changing critical recovery threshold" description = "Questions Changing critical recovery threshold"
type = "string" type = "string"
default = 0.99 default = 0.99
} }
variable "questions_changing_silenced" { variable "questions_changing_anomaly_silenced" {
description = "Groups to mute for GCP Cloud SQL Questions Changing monitor" description = "Groups to mute for GCP Cloud SQL Questions Changing monitor"
type = "map" type = "map"
default = {} default = {}
} }
variable "questions_changing_extra_tags" { variable "questions_changing_anomaly_extra_tags" {
description = "Extra tags for GCP Cloud SQL Questions Changing monitor" description = "Extra tags for GCP Cloud SQL Questions Changing monitor"
type = "list" type = "list"
default = [] default = []

View File

@ -41,9 +41,11 @@ EOF
timeout_h = 0 timeout_h = 0
include_tags = true include_tags = true
locked = false locked = false
evaluation_delay = "${var.delay}"
new_host_delay = "${var.delay}" evaluation_delay = "${var.delay}"
silenced = "${var.network_connections_silenced}" new_host_delay = "${var.delay}"
silenced = "${var.network_connections_silenced}"
tags = [ tags = [
"team:gcp", "team:gcp",
@ -108,32 +110,32 @@ EOF
# Queries Anomaly # Queries Anomaly
# #
resource "datadog_monitor" "queries_changing_anomaly" { resource "datadog_monitor" "queries_changing_anomaly" {
count = "${var.queries_changing_enabled} == true ? 1 : 0 " count = "${var.queries_changing_anomaly_enabled} == true ? 1 : 0 "
name = "[${var.environment}] Cloud SQL MySQL Queries Count changed abnormally" name = "[${var.environment}] Cloud SQL MySQL Queries Count changed abnormally"
message = "${coalesce(var.queries_changing_message, var.message)}" message = "${coalesce(var.queries_changing_anomaly_message, var.message)}"
type = "query alert" type = "query alert"
query = <<EOF query = <<EOF
avg(${var.queries_changing_timeframe}): avg(${var.queries_changing_anomaly_timeframe}):
anomalies( anomalies(
avg:gcp.cloudsql.database.mysql.queries{${data.template_file.filter.rendered}} by {database_id}.as_count() avg:gcp.cloudsql.database.mysql.queries{${data.template_file.filter.rendered}} by {database_id}.as_count()
'${var.queries_changing_anomaly_detection_algorithm}', '${var.queries_changing_anomaly_detection_algorithm}',
${var.queries_changing_deviations}, ${var.queries_changing_anomaly_deviations},
direction='${var.queries_changing_direction}', direction='${var.queries_changing_anomaly_direction}',
alert_window='last_30m', alert_window='${var.queries_changing_anomaly_alert_window}',
interval=20, interval=${var.queries_changing_anomaly_interval},
count_default_zero='false', count_default_zero='${var.queries_changing_anomaly_count_default_zero}',
seasonality='${var.queries_changing_seasonality}' seasonality='${var.queries_changing_anomaly_seasonality}'
) )
> ${var.queries_changing_threshold_critical} > ${var.queries_changing_anomaly_threshold_critical}
EOF EOF
thresholds { thresholds {
warning = "${var.queries_changing_threshold_warning}" warning = "${var.queries_changing_anomaly_threshold_warning}"
critical = "${var.queries_changing_threshold_critical}" critical = "${var.queries_changing_anomaly_threshold_critical}"
critical_recovery = "${var.queries_changing_threshold_critical_recovery}" critical_recovery = "${var.queries_changing_anomaly_threshold_critical_recovery}"
} }
notify_audit = false notify_audit = false
@ -147,7 +149,7 @@ EOF
evaluation_delay = "${var.delay}" evaluation_delay = "${var.delay}"
new_host_delay = "${var.delay}" new_host_delay = "${var.delay}"
silenced = "${var.queries_changing_silenced}" silenced = "${var.queries_changing_anomaly_silenced}"
tags = [ tags = [
"team:gcp", "team:gcp",
@ -156,7 +158,7 @@ EOF
"created_by:terraform", "created_by:terraform",
"resource:cloud-sql", "resource:cloud-sql",
"engine:mysql", "engine:mysql",
"${var.queries_changing_extra_tags}", "${var.queries_changing_anomaly_extra_tags}",
] ]
} }
@ -164,10 +166,10 @@ EOF
# Questions Anomaly # Questions Anomaly
# #
resource "datadog_monitor" "questions_changing_anomaly" { resource "datadog_monitor" "questions_changing_anomaly" {
count = "${var.questions_changing_enabled} == true ? 1 : 0 " count = "${var.questions_changing_anomaly_enabled} == true ? 1 : 0 "
name = "[${var.environment}] Cloud SQL MySQL Questions Count changed abnormally" name = "[${var.environment}] Cloud SQL MySQL Questions Count changed abnormally"
message = "${coalesce(var.questions_changing_message, var.message)}" message = "${coalesce(var.questions_changing_anomaly_message, var.message)}"
type = "query alert" type = "query alert"
@ -176,20 +178,20 @@ resource "datadog_monitor" "questions_changing_anomaly" {
anomalies( anomalies(
avg:gcp.cloudsql.database.mysql.questions{${data.template_file.filter.rendered}} by {database_id}, avg:gcp.cloudsql.database.mysql.questions{${data.template_file.filter.rendered}} by {database_id},
'${var.questions_changing_anomaly_detection_algorithm}', '${var.questions_changing_anomaly_detection_algorithm}',
${var.questions_changing_deviations}, ${var.questions_changing_anomaly_deviations},
direction='${var.questions_changing_direction}', direction='${var.questions_changing_anomaly_direction}',
alert_window='last_30m', alert_window='${var.questions_changing_anomaly_alert_window}',
interval=20, interval=${var.questions_changing_anomaly_interval},
count_default_zero='false', count_default_zero='${var.questions_changing_anomaly_count_default_zero}',
seasonality='${var.questions_changing_seasonality}' seasonality='${var.questions_changing_anomaly_seasonality}'
) )
> ${var.questions_changing_threshold_critical} > ${var.questions_changing_anomaly_threshold_critical}
EOF EOF
thresholds { thresholds {
warning = "${var.questions_changing_threshold_warning}" warning = "${var.questions_changing_anomaly_threshold_warning}"
critical = "${var.questions_changing_threshold_critical}" critical = "${var.questions_changing_anomaly_threshold_critical}"
critical_recovery = "${var.questions_changing_threshold_critical_recovery}" critical_recovery = "${var.questions_changing_anomaly_threshold_critical_recovery}"
} }
notify_audit = false notify_audit = false
@ -203,7 +205,7 @@ EOF
evaluation_delay = "${var.delay}" evaluation_delay = "${var.delay}"
new_host_delay = "${var.delay}" new_host_delay = "${var.delay}"
silenced = "${var.questions_changing_silenced}" silenced = "${var.questions_changing_anomaly_silenced}"
tags = [ tags = [
"team:gcp", "team:gcp",
@ -212,6 +214,6 @@ EOF
"created_by:terraform", "created_by:terraform",
"resource:cloud-sql", "resource:cloud-sql",
"engine:mysql", "engine:mysql",
"${var.questions_changing_extra_tags}", "${var.questions_changing_anomaly_extra_tags}",
] ]
} }