MON-224 CloudSQL MySQL Monitors for Queries and Questions Anomalies
This commit is contained in:
parent
cbc06ae0dc
commit
9233464195
@ -30,6 +30,7 @@ Useful links
|
||||
* [Datadog Useful monitors for GCP CloudSQL](https://www.datadoghq.com/blog/monitor-google-cloud-sql/)
|
||||
* [Max connections depends on the type of the instance](https://cloud.google.com/sql/docs/quotas#fixed-limits)
|
||||
* [Monitoring Replication Lag](https://cloud.google.com/sql/docs/mysql/high-availability#replication-lag-monitor)
|
||||
* [Monitoring MySQL Performance Metrics](https://www.datadoghq.com/blog/monitoring-mysql-performance-metrics)
|
||||
|
||||
Inputs
|
||||
------
|
||||
@ -48,6 +49,28 @@ Inputs
|
||||
| network_connections_threshold_warning | Number of network connections (warning threshold) | string | `3200` | no |
|
||||
| network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_5m` | no |
|
||||
| project_id | ID of the GCP Project | string | - | yes |
|
||||
| queries_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `robust` | no |
|
||||
| queries_changing_database_ids | Queries Changing Abnormally | list | `<list>` | no |
|
||||
| queries_changing_deviations | Deviations to detect the anomaly | string | `4` | no |
|
||||
| queries_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no |
|
||||
| queries_changing_message | Custom message for the Queries Changing monitor | string | `` | no |
|
||||
| queries_changing_region | | string | `` | no |
|
||||
| queries_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no |
|
||||
| queries_changing_silenced | Groups to mute for GCP Cloud SQL Queries Changing monitor | map | `<map>` | no |
|
||||
| queries_changing_threshold_critical | Queries Changing critical threshold | string | `1` | no |
|
||||
| queries_changing_threshold_warning | Queries Changing warning threshold | string | `0.5` | no |
|
||||
| queries_changing_timeframe | Timeframe for the Queries Changing mon monitor | string | `last_10m` | no |
|
||||
| questions_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `robust` | no |
|
||||
| questions_changing_database_ids | | list | `<list>` | no |
|
||||
| questions_changing_deviations | Deviations to detect the anomaly | string | `4` | no |
|
||||
| questions_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no |
|
||||
| questions_changing_message | Custom message for the Questions Changing monitor | string | `` | no |
|
||||
| questions_changing_region | | string | `` | no |
|
||||
| questions_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no |
|
||||
| questions_changing_silenced | Groups to mute for GCP Cloud SQL Network Connections monitor | map | `<map>` | no |
|
||||
| questions_changing_threshold_critical | Questions Changing critical threshold | string | `1` | no |
|
||||
| questions_changing_threshold_warning | Questions Changing warning threshold | string | `0.5` | no |
|
||||
| questions_changing_timeframe | Timeframe for the Questions Changing mon monitor | string | `last_10m` | no |
|
||||
| replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no |
|
||||
| replication_lag_silenced | Groups to mute for GCP Cloud SQL Replication Lag monitor | map | `<map>` | no |
|
||||
| replication_lag_threshold_critical | Seconds behind the master (critical threshold) | string | `2700` | no |
|
||||
|
||||
@ -104,3 +104,141 @@ variable "replication_lag_silenced" {
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
#
|
||||
# Queries Changing Abnormally
|
||||
#
|
||||
variable "queries_changing_database_ids" {
|
||||
description = ""
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "queries_changing_region" {
|
||||
description = ""
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "queries_changing_message" {
|
||||
description = "Custom message for the Queries Changing monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "queries_changing_timeframe" {
|
||||
description = "Timeframe for the Queries Changing mon monitor"
|
||||
type = "string"
|
||||
default = "last_10m"
|
||||
}
|
||||
|
||||
variable "queries_changing_anomaly_detection_algorithm" {
|
||||
description = "Anomaly Detection Algorithm used"
|
||||
type = "string"
|
||||
default = "robust"
|
||||
}
|
||||
|
||||
variable "queries_changing_deviations" {
|
||||
description = "Deviations to detect the anomaly"
|
||||
type = "string"
|
||||
default = 4
|
||||
}
|
||||
|
||||
variable "queries_changing_direction" {
|
||||
description = "Direction of the anomaly. It can be both, below or above."
|
||||
type = "string"
|
||||
default = "both"
|
||||
}
|
||||
|
||||
variable "queries_changing_seasonality" {
|
||||
description = "Seasonality of the algorithm"
|
||||
type = "string"
|
||||
default = "weekly"
|
||||
}
|
||||
|
||||
variable "queries_changing_threshold_warning" {
|
||||
description = "Queries Changing warning threshold"
|
||||
type = "string"
|
||||
default = 0.5
|
||||
}
|
||||
|
||||
variable "queries_changing_threshold_critical" {
|
||||
description = "Queries Changing critical threshold"
|
||||
type = "string"
|
||||
default = 1
|
||||
}
|
||||
|
||||
variable "queries_changing_silenced" {
|
||||
description = "Groups to mute for GCP Cloud SQL Queries Changing monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
#
|
||||
# Questions Changing
|
||||
#
|
||||
variable "questions_changing_message" {
|
||||
description = "Custom message for the Questions Changing monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "questions_changing_timeframe" {
|
||||
description = "Timeframe for the Questions Changing mon monitor"
|
||||
type = "string"
|
||||
default = "last_10m"
|
||||
}
|
||||
|
||||
variable "questions_changing_database_ids" {
|
||||
description = ""
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "questions_changing_region" {
|
||||
description = ""
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "questions_changing_anomaly_detection_algorithm" {
|
||||
description = "Anomaly Detection Algorithm used"
|
||||
type = "string"
|
||||
default = "robust"
|
||||
}
|
||||
|
||||
variable "questions_changing_deviations" {
|
||||
description = "Deviations to detect the anomaly"
|
||||
type = "string"
|
||||
default = 4
|
||||
}
|
||||
|
||||
variable "questions_changing_direction" {
|
||||
description = "Direction of the anomaly. It can be both, below or above."
|
||||
type = "string"
|
||||
default = "both"
|
||||
}
|
||||
|
||||
variable "questions_changing_seasonality" {
|
||||
description = "Seasonality of the algorithm"
|
||||
type = "string"
|
||||
default = "weekly"
|
||||
}
|
||||
|
||||
variable "questions_changing_threshold_warning" {
|
||||
description = "Questions Changing warning threshold"
|
||||
type = "string"
|
||||
default = 0.5
|
||||
}
|
||||
|
||||
variable "questions_changing_threshold_critical" {
|
||||
description = "Questions Changing critical threshold"
|
||||
type = "string"
|
||||
default = 1
|
||||
}
|
||||
|
||||
variable "questions_changing_silenced" {
|
||||
description = "Groups to mute for GCP Cloud SQL Network Connections monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
@ -94,3 +94,107 @@ EOF
|
||||
"engine:mysql",
|
||||
]
|
||||
}
|
||||
|
||||
#
|
||||
# Queries Anomaly
|
||||
#
|
||||
resource "datadog_monitor" "queries_changing_anomaly" {
|
||||
count = "${length(var.queries_changing_database_ids)}"
|
||||
|
||||
name = "[${var.environment}] Cloud SQL MySQL Queries Count changed abnormally on ${var.project_id}:${var.queries_changing_region}:${var.queries_changing_database_ids[count.index]} {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.queries_changing_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
avg(${var.queries_changing_timeframe}):
|
||||
anomalies(
|
||||
default(
|
||||
avg:gcp.cloudsql.database.mysql.queries{project_id:${var.project_id},database_id:${var.project_id}:${var.queries_changing_region}:${var.queries_changing_database_ids[count.index]}},
|
||||
0),
|
||||
'${var.queries_changing_anomaly_detection_algorithm}',
|
||||
${var.queries_changing_deviations},
|
||||
direction='${var.queries_changing_direction}',
|
||||
seasonality='${var.queries_changing_seasonality}'
|
||||
)
|
||||
> ${var.queries_changing_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.queries_changing_threshold_warning}"
|
||||
critical = "${var.queries_changing_threshold_critical}"
|
||||
}
|
||||
|
||||
include_tags = true
|
||||
notify_no_data = true
|
||||
require_full_window = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
evaluation_delay = "${var.delay}"
|
||||
new_host_delay = "${var.delay}"
|
||||
silenced = "${var.queries_changing_silenced}"
|
||||
|
||||
tags = [
|
||||
"team:gcp",
|
||||
"provider:gcp",
|
||||
"env:${var.environment}",
|
||||
"resource:cloud-sql",
|
||||
"engine:mysql",
|
||||
"database_id:${var.project_id}:${var.queries_changing_region}:${var.queries_changing_database_ids[count.index]}}",
|
||||
]
|
||||
}
|
||||
|
||||
#
|
||||
# Questions Anomaly
|
||||
#
|
||||
resource "datadog_monitor" "questions_changing_anomaly" {
|
||||
count = "${length(var.questions_changing_database_ids)}"
|
||||
|
||||
name = "[${var.environment}] Cloud SQL MySQL Questions Count changed abnormally on ${var.project_id}:${var.questions_changing_region}:${var.questions_changing_database_ids[count.index]} {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.questions_changing_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
avg(${var.questions_changing_timeframe}):
|
||||
anomalies(
|
||||
default(
|
||||
avg:gcp.cloudsql.database.mysql.questions{project_id:${var.project_id},database_id:${var.project_id}:${var.questions_changing_region}:${var.questions_changing_database_ids[count.index]}},
|
||||
0),
|
||||
'${var.questions_changing_anomaly_detection_algorithm}',
|
||||
${var.questions_changing_deviations},
|
||||
direction='${var.questions_changing_direction}',
|
||||
seasonality='${var.questions_changing_seasonality}'
|
||||
)
|
||||
> ${var.questions_changing_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.questions_changing_threshold_warning}"
|
||||
critical = "${var.questions_changing_threshold_critical}"
|
||||
}
|
||||
|
||||
include_tags = true
|
||||
notify_no_data = true
|
||||
require_full_window = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
evaluation_delay = "${var.delay}"
|
||||
new_host_delay = "${var.delay}"
|
||||
silenced = "${var.questions_changing_silenced}"
|
||||
|
||||
tags = [
|
||||
"team:gcp",
|
||||
"provider:gcp",
|
||||
"env:${var.environment}",
|
||||
"resource:cloud-sql",
|
||||
"engine:mysql",
|
||||
"database_id:${var.project_id}:${var.questions_changing_region}:${var.questions_changing_database_ids[count.index]}",
|
||||
]
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user