Merged in MON-73-azure-managed-services-monitors-base-feature (pull request #24)
MON-73 Azure managed services monitors base feature Approved-by: Laurent Piroelle <laurent.piroelle@fr.clara.net> Approved-by: Quentin Manfroi <quentin.manfroi@yahoo.fr> Approved-by: Jérôme Respaut <shr3ps@gmail.com> Approved-by: Alexandre Gaillet <alexandre.gaillet@fr.clara.net>
This commit is contained in:
commit
025ff04d9e
118
cloud/azure/README.md
Normal file
118
cloud/azure/README.md
Normal file
@ -0,0 +1,118 @@
|
||||
Azure monitors
|
||||
==============
|
||||
|
||||
How to use this module
|
||||
----------------------
|
||||
|
||||
```
|
||||
module "datadog-monitors-azure" {
|
||||
source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure?ref={revision}"
|
||||
|
||||
message = "${module.datadog-message-alerting.alerting-message}"
|
||||
environment = "${var.environment}"
|
||||
}
|
||||
```
|
||||
|
||||
Purpose
|
||||
-------
|
||||
Creates a set of Azure DataDog monitors for the following components :
|
||||
|
||||
* Azure App Services monitors
|
||||
* Azure SQL monitors
|
||||
* Azure Redis monitors
|
||||
* Azure Event Hub monitors
|
||||
* Azure Stream Analytics monitors
|
||||
* Azure Storage monitors
|
||||
* Azure IOT Hub monitors
|
||||
* Azure API Management monitors
|
||||
|
||||
Inputs
|
||||
------
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| apimanagement_failed_requests_threshold_critical | Maximum acceptable percent of failed requests | string | `5` | no |
|
||||
| apimanagement_other_requests_threshold_critical | Maximum acceptable percent of other requests | string | `5` | no |
|
||||
| apimanagement_successful_requests_threshold_critical | Minimum acceptable percent of successful requests | string | `90` | no |
|
||||
| apimanagement_unauthorized_requests_threshold_critical | Maximum acceptable percent of unauthorized requests | string | `5` | no |
|
||||
| appservices_http_2xx_requests_threshold_critical | Minimum critical acceptable percent of 2xx requests | string | `90` | no |
|
||||
| appservices_http_2xx_requests_threshold_warning | Minimum warning acceptable percent of 2xx requests | string | `95` | no |
|
||||
| appservices_http_4xx_requests_threshold_critical | Maximum critical acceptable percent of 4xx errors | string | `30` | no |
|
||||
| appservices_http_4xx_requests_threshold_warning | Maximum warning acceptable percent of 4xx errors | string | `15` | no |
|
||||
| appservices_http_5xx_requests_threshold_critical | Maximum critical acceptable percent of 5xx errors | string | `20` | no |
|
||||
| appservices_http_5xx_requests_threshold_warning | Maximum warning acceptable percent of 5xx errors | string | `10` | no |
|
||||
| appservices_memory_usage_threshold_critical | Alerting threshold in Mib | string | `52430000` | no |
|
||||
| appservices_memory_usage_threshold_warning | Warning threshold in MiB | string | `33550000` | no |
|
||||
| appservices_response_time_threshold_critical | Alerting threshold in seconds | string | `0.8` | no |
|
||||
| appservices_response_time_threshold_warning | Warning threshold in seconds | string | `0.4` | no |
|
||||
| delay | Delay in seconds for the metric evaluation | string | `600` | no |
|
||||
| environment | Architecture environment | string | - | yes |
|
||||
| eventhub_errors_rate_thresold_critical | Errors ratio (percentage) to trigger the critical alert | string | `3` | no |
|
||||
| eventhub_errors_rate_thresold_warning | Errors ratio (percentage) to trigger a warning alert | string | `1` | no |
|
||||
| eventhub_failed_requests_rate_thresold_critical | Failed requests ratio (percentage) to trigger the critical alert | string | `3` | no |
|
||||
| eventhub_failed_requests_rate_thresold_warning | Failed requests ratio (percentage) to trigger a warning alert | string | `1` | no |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| iothub_dropped_d2c_telemetry_egress_threshold_critical | D2C Telemetry Dropped limit (critical threshold) | string | `1000` | no |
|
||||
| iothub_dropped_d2c_telemetry_egress_threshold_warning | D2C Telemetry Dropped limit (warning threshold) | string | `500` | no |
|
||||
| iothub_failed_c2d_methods_rate_threshold_critical | C2D Methods Failed rate limit (critical threshold) | string | `10` | no |
|
||||
| iothub_failed_c2d_methods_rate_threshold_warning | C2D Methods Failed rate limit (warning threshold) | string | `0` | no |
|
||||
| iothub_failed_c2d_twin_read_rate_threshold_critical | C2D Twin Read Failed rate limit (critical threshold) | string | `10` | no |
|
||||
| iothub_failed_c2d_twin_read_rate_threshold_warning | C2D Twin Read Failed rate limit (warning threshold) | string | `0` | no |
|
||||
| iothub_failed_c2d_twin_update_rate_threshold_critical | C2D Twin Update Failed rate limit (critical threshold) | string | `10` | no |
|
||||
| iothub_failed_c2d_twin_update_rate_threshold_warning | C2D Twin Update Failed rate limit (warning threshold) | string | `0` | no |
|
||||
| iothub_failed_d2c_twin_read_rate_threshold_critical | D2C Twin Read Failed rate limit (critical threshold) | string | `10` | no |
|
||||
| iothub_failed_d2c_twin_read_rate_threshold_warning | D2C Twin Read Failed rate limit (warning threshold) | string | `0` | no |
|
||||
| iothub_failed_d2c_twin_update_rate_threshold_critical | D2C Twin Update Failed rate limit (critical threshold) | string | `10` | no |
|
||||
| iothub_failed_d2c_twin_update_rate_threshold_warning | D2C Twin Update Failed rate limit (warning threshold) | string | `0` | no |
|
||||
| iothub_failed_jobs_rate_threshold_critical | Jobs Failed rate limit (critical threshold) | string | `10` | no |
|
||||
| iothub_failed_jobs_rate_threshold_warning | Jobs Failed rate limit (warning threshold) | string | `0` | no |
|
||||
| iothub_failed_listjobs_rate_threshold_critical | ListJobs Failed rate limit (critical threshold) | string | `10` | no |
|
||||
| iothub_failed_listjobs_rate_threshold_warning | ListJobs Failed rate limit (warning threshold) | string | `0` | no |
|
||||
| iothub_failed_queryjobs_rate_threshold_critical | QueryJobs Failed rate limit (critical threshold) | string | `10` | no |
|
||||
| iothub_failed_queryjobs_rate_threshold_warning | QueryJobs Failed rate limit (warning threshold) | string | `0` | no |
|
||||
| iothub_fallback_d2c_telemetry_egress_threshold_critical | D2C Telemetry Fallback limit (critical threshold) | string | `1000` | no |
|
||||
| iothub_fallback_d2c_telemetry_egress_threshold_warning | D2C Telemetry Fallback limit (warning threshold) | string | `500` | no |
|
||||
| iothub_invalid_d2c_telemetry_egress_threshold_critical | D2C Telemetry Invalid limit (critical threshold) | string | `1000` | no |
|
||||
| iothub_invalid_d2c_telemetry_egress_threshold_warning | D2C Telemetry Invalid limit (warning threshold) | string | `500` | no |
|
||||
| iothub_orphaned_d2c_telemetry_egress_threshold_critical | D2C Telemetry Orphaned limit (critical threshold) | string | `1000` | no |
|
||||
| iothub_orphaned_d2c_telemetry_egress_threshold_warning | D2C Telemetry Orphaned limit (warning threshold) | string | `500` | no |
|
||||
| message | Message sent when a monitor is triggered | string | - | yes |
|
||||
| non_taggable_filter_tags | Tags used for filtering for components without tag support | string | `*` | no |
|
||||
| redis_evictedkeys_limit_threshold_critical | Evicted keys limit (critical threshold) | string | `100` | no |
|
||||
| redis_evictedkeys_limit_threshold_warning | Evicted keys limit (warning threshold) | string | `0` | no |
|
||||
| redis_percent_processor_time_threshold_critical | Processor time percent (critical threshold) | string | `80` | no |
|
||||
| redis_percent_processor_time_threshold_warning | Processor time percent (warning threshold) | string | `60` | no |
|
||||
| redis_server_load_rate_threshold_critical | Server CPU load rate (critical threshold) | string | `90` | no |
|
||||
| redis_server_load_rate_threshold_warning | Server CPU load rate (warning threshold) | string | `70` | no |
|
||||
| sqldatabase_cpu_threshold_critical | CPU usage in percent (critical threshold) | string | `90` | no |
|
||||
| sqldatabase_cpu_threshold_warning | CPU usage in percent (warning threshold) | string | `80` | no |
|
||||
| sqldatabase_deadlock_threshold_critical | Amount of Deadlocks (critical threshold) | string | `1` | no |
|
||||
| sqldatabase_diskspace_threshold_critical | Disk space used in percent (critical threshold) | string | `90` | no |
|
||||
| sqldatabase_diskspace_threshold_warning | Disk space used in percent (warning threshold) | string | `80` | no |
|
||||
| sqldatabase_dtu_threshold_critical | Amount of DTU used (critical threshold) | string | `90` | no |
|
||||
| sqldatabase_dtu_threshold_warning | Amount of DTU used (warning threshold) | string | `85` | no |
|
||||
| storage_authorization_error_requests_threshold_critical | Maximum acceptable percent of authorization error requests for a storage | string | `15` | no |
|
||||
| storage_availability_threshold_critical | Minimum acceptable percent of availability for a storage | string | `90` | no |
|
||||
| storage_client_other_error_requests_threshold_critical | Maximum acceptable percent of client other error requests for a storage | string | `15` | no |
|
||||
| storage_latency_threshold_critical | Maximum acceptable end to end latency (ms) for a storage | string | `1000` | no |
|
||||
| storage_network_error_requests_threshold_critical | Maximum acceptable percent of network error requests for a storage | string | `5` | no |
|
||||
| storage_server_other_error_requests_threshold_critical | Maximum acceptable percent of server other error requests for a storage | string | `10` | no |
|
||||
| storage_successful_requests_threshold_critical | Minimum acceptable percent of successful requests for a storage | string | `90` | no |
|
||||
| storage_throttling_error_requests_threshold_critical | Maximum acceptable percent of throttling error requests for a storage | string | `10` | no |
|
||||
| storage_timeout_error_requests_threshold_critical | Maximum acceptable percent of timeout error requests for a storage | string | `5` | no |
|
||||
| streamanalytics_conversion_errors_threshold_critical | Conversion errors limit (critical threshold) | string | `10` | no |
|
||||
| streamanalytics_conversion_errors_threshold_warning | Conversion errors limit (warning threshold) | string | `0` | no |
|
||||
| streamanalytics_failed_function_requests_threshold_critical | Failed Function Request rate limit (critical threshold) | string | `10` | no |
|
||||
| streamanalytics_function_requests_threshold_warning | Failed Function Request rate limit (warning threshold) | string | `0` | no |
|
||||
| streamanalytics_runtime_errors_threshold_critical | Runtime errors limit (critical threshold) | string | `10` | no |
|
||||
| streamanalytics_runtime_errors_threshold_warning | Runtime errors limit (warning threshold) | string | `0` | no |
|
||||
| streamanalytics_su_utilization_threshold_critical | Streaming Unit utilization rate limit (critical threshold) | string | `80` | no |
|
||||
| streamanalytics_su_utilization_threshold_warning | Streaming Unit utilization rate limit (warning threshold) | string | `60` | no |
|
||||
|
||||
Related documentation
|
||||
---------------------
|
||||
|
||||
DataDog documentation: [https://docs.datadoghq.com/integrations/azure/](https://docs.datadoghq.com/integrations/azure/)
|
||||
|
||||
Azure metrics documentation: [https://docs.microsoft.com/en-us/azure/monitoring-and-diagnostics/monitoring-overview-metrics](https://docs.microsoft.com/en-us/azure/monitoring-and-diagnostics/monitoring-overview-metrics)
|
||||
397
cloud/azure/inputs.tf
Normal file
397
cloud/azure/inputs.tf
Normal file
@ -0,0 +1,397 @@
|
||||
variable "environment" {
|
||||
description = "Architecture environment"
|
||||
type = "string"
|
||||
}
|
||||
|
||||
variable "message" {
|
||||
description = "Message sent when a monitor is triggered"
|
||||
type = "string"
|
||||
}
|
||||
|
||||
variable "delay" {
|
||||
description = "Delay in seconds for the metric evaluation"
|
||||
default = 600
|
||||
}
|
||||
|
||||
variable "filter_tags_use_defaults" {
|
||||
description = "Use default filter tags convention"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "filter_tags_custom" {
|
||||
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
|
||||
default = "*"
|
||||
}
|
||||
|
||||
variable "non_taggable_filter_tags" {
|
||||
description = "Tags used for filtering for components without tag support"
|
||||
default = "*"
|
||||
}
|
||||
|
||||
# Azure API Management specific variables
|
||||
variable "apimanagement_failed_requests_threshold_critical" {
|
||||
description = "Maximum acceptable percent of failed requests"
|
||||
default = 5
|
||||
}
|
||||
|
||||
variable "apimanagement_other_requests_threshold_critical" {
|
||||
description = "Maximum acceptable percent of other requests"
|
||||
default = 5
|
||||
}
|
||||
|
||||
variable "apimanagement_unauthorized_requests_threshold_critical" {
|
||||
description = "Maximum acceptable percent of unauthorized requests"
|
||||
default = 5
|
||||
}
|
||||
|
||||
variable "apimanagement_successful_requests_threshold_critical" {
|
||||
description = "Minimum acceptable percent of successful requests"
|
||||
default = 90
|
||||
}
|
||||
|
||||
# Azure App Services specific variables
|
||||
variable "appservices_response_time_threshold_critical" {
|
||||
default = 0.8
|
||||
description = "Alerting threshold in seconds"
|
||||
}
|
||||
|
||||
variable "appservices_response_time_threshold_warning" {
|
||||
default = 0.4
|
||||
description = "Warning threshold in seconds"
|
||||
}
|
||||
|
||||
variable "appservices_memory_usage_threshold_critical" {
|
||||
default = 52430000
|
||||
description = "Alerting threshold in Mib"
|
||||
}
|
||||
|
||||
variable "appservices_memory_usage_threshold_warning" {
|
||||
default = 33550000
|
||||
description = "Warning threshold in MiB"
|
||||
}
|
||||
|
||||
variable "appservices_http_4xx_requests_threshold_critical" {
|
||||
default = 30
|
||||
description = "Maximum critical acceptable percent of 4xx errors"
|
||||
}
|
||||
|
||||
variable "appservices_http_4xx_requests_threshold_warning" {
|
||||
default = 15
|
||||
description = "Maximum warning acceptable percent of 4xx errors"
|
||||
}
|
||||
|
||||
variable "appservices_http_5xx_requests_threshold_critical" {
|
||||
default = 20
|
||||
description = "Maximum critical acceptable percent of 5xx errors"
|
||||
}
|
||||
|
||||
variable "appservices_http_5xx_requests_threshold_warning" {
|
||||
default = 10
|
||||
description = "Maximum warning acceptable percent of 5xx errors"
|
||||
}
|
||||
|
||||
variable "appservices_http_2xx_requests_threshold_critical" {
|
||||
default = 90
|
||||
description = "Minimum critical acceptable percent of 2xx requests"
|
||||
}
|
||||
|
||||
variable "appservices_http_2xx_requests_threshold_warning" {
|
||||
default = 95
|
||||
description = "Minimum warning acceptable percent of 2xx requests"
|
||||
}
|
||||
|
||||
# Azure Event Hub specific variables
|
||||
variable "eventhub_failed_requests_rate_thresold_critical" {
|
||||
description = "Failed requests ratio (percentage) to trigger the critical alert"
|
||||
default = 3
|
||||
}
|
||||
|
||||
variable "eventhub_failed_requests_rate_thresold_warning" {
|
||||
description = "Failed requests ratio (percentage) to trigger a warning alert"
|
||||
default = 1
|
||||
}
|
||||
|
||||
variable "eventhub_errors_rate_thresold_critical" {
|
||||
description = "Errors ratio (percentage) to trigger the critical alert"
|
||||
default = 3
|
||||
}
|
||||
|
||||
variable "eventhub_errors_rate_thresold_warning" {
|
||||
description = "Errors ratio (percentage) to trigger a warning alert"
|
||||
default = 1
|
||||
}
|
||||
|
||||
# IOT Hub specific variables
|
||||
variable "iothub_failed_jobs_rate_threshold_warning" {
|
||||
description = "Jobs Failed rate limit (warning threshold)"
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "iothub_failed_jobs_rate_threshold_critical" {
|
||||
description = "Jobs Failed rate limit (critical threshold)"
|
||||
default = 10
|
||||
}
|
||||
|
||||
variable "iothub_failed_listjobs_rate_threshold_warning" {
|
||||
description = "ListJobs Failed rate limit (warning threshold)"
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "iothub_failed_listjobs_rate_threshold_critical" {
|
||||
description = "ListJobs Failed rate limit (critical threshold)"
|
||||
default = 10
|
||||
}
|
||||
|
||||
variable "iothub_failed_queryjobs_rate_threshold_warning" {
|
||||
description = "QueryJobs Failed rate limit (warning threshold)"
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "iothub_failed_queryjobs_rate_threshold_critical" {
|
||||
description = "QueryJobs Failed rate limit (critical threshold)"
|
||||
default = 10
|
||||
}
|
||||
|
||||
variable "iothub_failed_c2d_methods_rate_threshold_warning" {
|
||||
description = "C2D Methods Failed rate limit (warning threshold)"
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "iothub_failed_c2d_methods_rate_threshold_critical" {
|
||||
description = "C2D Methods Failed rate limit (critical threshold)"
|
||||
default = 10
|
||||
}
|
||||
|
||||
variable "iothub_failed_c2d_twin_read_rate_threshold_warning" {
|
||||
description = "C2D Twin Read Failed rate limit (warning threshold)"
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "iothub_failed_c2d_twin_read_rate_threshold_critical" {
|
||||
description = "C2D Twin Read Failed rate limit (critical threshold)"
|
||||
default = 10
|
||||
}
|
||||
|
||||
variable "iothub_failed_c2d_twin_update_rate_threshold_warning" {
|
||||
description = "C2D Twin Update Failed rate limit (warning threshold)"
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "iothub_failed_c2d_twin_update_rate_threshold_critical" {
|
||||
description = "C2D Twin Update Failed rate limit (critical threshold)"
|
||||
default = 10
|
||||
}
|
||||
|
||||
variable "iothub_failed_d2c_twin_read_rate_threshold_warning" {
|
||||
description = "D2C Twin Read Failed rate limit (warning threshold)"
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "iothub_failed_d2c_twin_read_rate_threshold_critical" {
|
||||
description = "D2C Twin Read Failed rate limit (critical threshold)"
|
||||
default = 10
|
||||
}
|
||||
|
||||
variable "iothub_failed_d2c_twin_update_rate_threshold_warning" {
|
||||
description = "D2C Twin Update Failed rate limit (warning threshold)"
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "iothub_failed_d2c_twin_update_rate_threshold_critical" {
|
||||
description = "D2C Twin Update Failed rate limit (critical threshold)"
|
||||
default = 10
|
||||
}
|
||||
|
||||
variable "iothub_dropped_d2c_telemetry_egress_threshold_warning" {
|
||||
description = "D2C Telemetry Dropped limit (warning threshold)"
|
||||
default = 500
|
||||
}
|
||||
|
||||
variable "iothub_dropped_d2c_telemetry_egress_threshold_critical" {
|
||||
description = "D2C Telemetry Dropped limit (critical threshold)"
|
||||
default = 1000
|
||||
}
|
||||
|
||||
variable "iothub_orphaned_d2c_telemetry_egress_threshold_warning" {
|
||||
description = "D2C Telemetry Orphaned limit (warning threshold)"
|
||||
default = 500
|
||||
}
|
||||
|
||||
variable "iothub_orphaned_d2c_telemetry_egress_threshold_critical" {
|
||||
description = "D2C Telemetry Orphaned limit (critical threshold)"
|
||||
default = 1000
|
||||
}
|
||||
|
||||
variable "iothub_invalid_d2c_telemetry_egress_threshold_warning" {
|
||||
description = "D2C Telemetry Invalid limit (warning threshold)"
|
||||
default = 500
|
||||
}
|
||||
|
||||
variable "iothub_invalid_d2c_telemetry_egress_threshold_critical" {
|
||||
description = "D2C Telemetry Invalid limit (critical threshold)"
|
||||
default = 1000
|
||||
}
|
||||
|
||||
variable "iothub_fallback_d2c_telemetry_egress_threshold_warning" {
|
||||
description = "D2C Telemetry Fallback limit (warning threshold)"
|
||||
default = 500
|
||||
}
|
||||
|
||||
variable "iothub_fallback_d2c_telemetry_egress_threshold_critical" {
|
||||
description = "D2C Telemetry Fallback limit (critical threshold)"
|
||||
default = 1000
|
||||
}
|
||||
|
||||
# Azure Redis specific variables
|
||||
variable "redis_evictedkeys_limit_threshold_warning" {
|
||||
description = "Evicted keys limit (warning threshold)"
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "redis_evictedkeys_limit_threshold_critical" {
|
||||
description = "Evicted keys limit (critical threshold)"
|
||||
default = 100
|
||||
}
|
||||
|
||||
variable "redis_percent_processor_time_threshold_critical" {
|
||||
description = "Processor time percent (critical threshold)"
|
||||
default = 80
|
||||
}
|
||||
|
||||
variable "redis_percent_processor_time_threshold_warning" {
|
||||
description = "Processor time percent (warning threshold)"
|
||||
default = 60
|
||||
}
|
||||
|
||||
variable "redis_server_load_rate_threshold_critical" {
|
||||
description = "Server CPU load rate (critical threshold)"
|
||||
default = 90
|
||||
}
|
||||
|
||||
variable "redis_server_load_rate_threshold_warning" {
|
||||
description = "Server CPU load rate (warning threshold)"
|
||||
default = 70
|
||||
}
|
||||
|
||||
# Azure SQL Database specific variables
|
||||
variable "sqldatabase_cpu_threshold_warning" {
|
||||
description = "CPU usage in percent (warning threshold)"
|
||||
default = "80"
|
||||
}
|
||||
|
||||
variable "sqldatabase_cpu_threshold_critical" {
|
||||
description = "CPU usage in percent (critical threshold)"
|
||||
default = "90"
|
||||
}
|
||||
|
||||
variable "sqldatabase_diskspace_threshold_warning" {
|
||||
description = "Disk space used in percent (warning threshold)"
|
||||
default = "80"
|
||||
}
|
||||
|
||||
variable "sqldatabase_diskspace_threshold_critical" {
|
||||
description = "Disk space used in percent (critical threshold)"
|
||||
default = "90"
|
||||
}
|
||||
|
||||
variable "sqldatabase_dtu_threshold_warning" {
|
||||
description = "Amount of DTU used (warning threshold)"
|
||||
default = "85"
|
||||
}
|
||||
|
||||
variable "sqldatabase_dtu_threshold_critical" {
|
||||
description = "Amount of DTU used (critical threshold)"
|
||||
default = "90"
|
||||
}
|
||||
|
||||
variable "sqldatabase_deadlock_threshold_critical" {
|
||||
description = "Amount of Deadlocks (critical threshold)"
|
||||
default = "1"
|
||||
}
|
||||
|
||||
# Azure Storage specific variables
|
||||
variable "storage_availability_threshold_critical" {
|
||||
description = "Minimum acceptable percent of availability for a storage"
|
||||
default = 90
|
||||
}
|
||||
|
||||
variable "storage_successful_requests_threshold_critical" {
|
||||
description = "Minimum acceptable percent of successful requests for a storage"
|
||||
default = 90
|
||||
}
|
||||
|
||||
variable "storage_latency_threshold_critical" {
|
||||
description = "Maximum acceptable end to end latency (ms) for a storage"
|
||||
default = 1000
|
||||
}
|
||||
|
||||
variable "storage_timeout_error_requests_threshold_critical" {
|
||||
description = "Maximum acceptable percent of timeout error requests for a storage"
|
||||
default = 5
|
||||
}
|
||||
|
||||
variable "storage_network_error_requests_threshold_critical" {
|
||||
description = "Maximum acceptable percent of network error requests for a storage"
|
||||
default = 5
|
||||
}
|
||||
|
||||
variable "storage_throttling_error_requests_threshold_critical" {
|
||||
description = "Maximum acceptable percent of throttling error requests for a storage"
|
||||
default = 10
|
||||
}
|
||||
|
||||
variable "storage_server_other_error_requests_threshold_critical" {
|
||||
description = "Maximum acceptable percent of server other error requests for a storage"
|
||||
default = 10
|
||||
}
|
||||
|
||||
variable "storage_client_other_error_requests_threshold_critical" {
|
||||
description = "Maximum acceptable percent of client other error requests for a storage"
|
||||
default = 15
|
||||
}
|
||||
|
||||
variable "storage_authorization_error_requests_threshold_critical" {
|
||||
description = "Maximum acceptable percent of authorization error requests for a storage"
|
||||
default = 15
|
||||
}
|
||||
|
||||
# Azure Stream Analytics specific variables
|
||||
variable "streamanalytics_su_utilization_threshold_warning" {
|
||||
description = "Streaming Unit utilization rate limit (warning threshold)"
|
||||
default = 60
|
||||
}
|
||||
|
||||
variable "streamanalytics_su_utilization_threshold_critical" {
|
||||
description = "Streaming Unit utilization rate limit (critical threshold)"
|
||||
default = 80
|
||||
}
|
||||
|
||||
variable "streamanalytics_function_requests_threshold_warning" {
|
||||
description = "Failed Function Request rate limit (warning threshold)"
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "streamanalytics_failed_function_requests_threshold_critical" {
|
||||
description = "Failed Function Request rate limit (critical threshold)"
|
||||
default = 10
|
||||
}
|
||||
|
||||
variable "streamanalytics_conversion_errors_threshold_warning" {
|
||||
description = "Conversion errors limit (warning threshold)"
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "streamanalytics_conversion_errors_threshold_critical" {
|
||||
description = "Conversion errors limit (critical threshold)"
|
||||
default = 10
|
||||
}
|
||||
|
||||
variable "streamanalytics_runtime_errors_threshold_warning" {
|
||||
description = "Runtime errors limit (warning threshold)"
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "streamanalytics_runtime_errors_threshold_critical" {
|
||||
description = "Runtime errors limit (critical threshold)"
|
||||
default = 10
|
||||
}
|
||||
@ -60,8 +60,7 @@ Inputs
|
||||
| failed_queryjobs_rate_threshold_warning | QueryJobs Failed rate limit (warning threshold) | string | `0` | no |
|
||||
| fallback_d2c_telemetry_egress_threshold_critical | D2C Telemetry Fallback limit (critical threshold) | string | `1000` | no |
|
||||
| fallback_d2c_telemetry_egress_threshold_warning | D2C Telemetry Fallback limit (warning threshold) | string | `500` | no |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| filter_tags | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| invalid_d2c_telemetry_egress_threshold_critical | D2C Telemetry Invalid limit (critical threshold) | string | `1000` | no |
|
||||
| invalid_d2c_telemetry_egress_threshold_warning | D2C Telemetry Invalid limit (warning threshold) | string | `500` | no |
|
||||
| message | Message sent when an alert is triggered | string | - | yes |
|
||||
|
||||
@ -14,14 +14,9 @@ variable "message" {
|
||||
description = "Message sent when an alert is triggered"
|
||||
}
|
||||
|
||||
variable "filter_tags_use_defaults" {
|
||||
description = "Use default filter tags convention"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "filter_tags_custom" {
|
||||
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
|
||||
default = "*"
|
||||
variable "filter_tags" {
|
||||
description = "Tags used for filtering"
|
||||
default = "*"
|
||||
}
|
||||
|
||||
# Azure IOT hubs specific
|
||||
|
||||
@ -1,20 +1,12 @@
|
||||
data "template_file" "filter" {
|
||||
template = "$${filter}"
|
||||
|
||||
vars {
|
||||
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_iothub:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
|
||||
}
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "too_many_jobs_failed" {
|
||||
name = "[${var.environment}] IOT Hub Too many jobs failed on {{name}}"
|
||||
message = "${var.message}"
|
||||
|
||||
query = <<EOF
|
||||
sum(last_5m):(
|
||||
avg:azure.devices_iothubs.jobs.failed{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
|
||||
( avg:azure.devices_iothubs.jobs.failed{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.devices_iothubs.jobs.completed{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() )
|
||||
avg:azure.devices_iothubs.jobs.failed{${var.filter_tags}} by {resource_group,region,name}.as_count() /
|
||||
( avg:azure.devices_iothubs.jobs.failed{${var.filter_tags}} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.devices_iothubs.jobs.completed{${var.filter_tags}} by {resource_group,region,name}.as_count() )
|
||||
) * 100 > ${var.failed_jobs_rate_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -45,9 +37,9 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
|
||||
|
||||
query = <<EOF
|
||||
sum(last_5m):(
|
||||
avg:azure.devices_iothubs.jobs.list_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() /
|
||||
( avg:azure.devices_iothubs.jobs.list_jobs.success{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() +
|
||||
avg:azure.devices_iothubs.jobs.list_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() )
|
||||
avg:azure.devices_iothubs.jobs.list_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_count() /
|
||||
( avg:azure.devices_iothubs.jobs.list_jobs.success{${var.filter_tags}} by {resource_group,name}.as_count() +
|
||||
avg:azure.devices_iothubs.jobs.list_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_count() )
|
||||
) * 100 > ${var.failed_listjobs_rate_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -78,9 +70,9 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
|
||||
|
||||
query = <<EOF
|
||||
sum(last_5m):(
|
||||
avg:azure.devices_iothubs.jobs.query_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() /
|
||||
( avg:azure.devices_iothubs.jobs.query_jobs.success{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() +
|
||||
avg:azure.devices_iothubs.jobs.query_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() )
|
||||
avg:azure.devices_iothubs.jobs.query_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_count() /
|
||||
( avg:azure.devices_iothubs.jobs.query_jobs.success{${var.filter_tags}} by {resource_group,name}.as_count() +
|
||||
avg:azure.devices_iothubs.jobs.query_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_count() )
|
||||
) * 100 > ${var.failed_queryjobs_rate_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -110,7 +102,7 @@ resource "datadog_monitor" "status" {
|
||||
message = "${var.message}"
|
||||
|
||||
query = <<EOF
|
||||
avg(last_5m):avg:azure.devices_iothubs.status{${data.template_file.filter.rendered}} by {resource_group,region,name} < 1
|
||||
avg(last_5m):avg:azure.devices_iothubs.status{${var.filter_tags}} by {resource_group,region,name} < 1
|
||||
EOF
|
||||
|
||||
type = "metric alert"
|
||||
@ -134,7 +126,7 @@ resource "datadog_monitor" "total_devices" {
|
||||
message = "${var.message}"
|
||||
|
||||
query = <<EOF
|
||||
avg(last_5m):avg:azure.devices_iothubs.devices.total_devices{${data.template_file.filter.rendered}} by {resource_group,region,name} == 0
|
||||
avg(last_5m):avg:azure.devices_iothubs.devices.total_devices{${var.filter_tags}} by {resource_group,region,name} == 0
|
||||
EOF
|
||||
|
||||
type = "metric alert"
|
||||
@ -159,9 +151,9 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
|
||||
|
||||
query = <<EOF
|
||||
sum(last_5m):(
|
||||
avg:azure.devices_iothubs.c2d.methods.failure{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
|
||||
( avg:azure.devices_iothubs.c2d.methods.failure{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.devices_iothubs.c2d.methods.success{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() )
|
||||
avg:azure.devices_iothubs.c2d.methods.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() /
|
||||
( avg:azure.devices_iothubs.c2d.methods.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.devices_iothubs.c2d.methods.success{${var.filter_tags}} by {resource_group,region,name}.as_count() )
|
||||
) * 100 > ${var.failed_c2d_methods_rate_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -192,9 +184,9 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
|
||||
|
||||
query = <<EOF
|
||||
sum(last_5m):(
|
||||
avg:azure.devices_iothubs.c2d.twin.read.failure{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
|
||||
( avg:azure.devices_iothubs.c2d.twin.read.failure{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.devices_iothubs.c2d.twin.read.success{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() )
|
||||
avg:azure.devices_iothubs.c2d.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() /
|
||||
( avg:azure.devices_iothubs.c2d.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.devices_iothubs.c2d.twin.read.success{${var.filter_tags}} by {resource_group,region,name}.as_count() )
|
||||
) * 100 > ${var.failed_c2d_twin_read_rate_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -225,9 +217,9 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
|
||||
|
||||
query = <<EOF
|
||||
sum(last_5m):(
|
||||
avg:azure.devices_iothubs.c2d.twin.update.failure{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
|
||||
( avg:azure.devices_iothubs.c2d.twin.update.failure{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.devices_iothubs.c2d.twin.update.success{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() )
|
||||
avg:azure.devices_iothubs.c2d.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() /
|
||||
( avg:azure.devices_iothubs.c2d.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.devices_iothubs.c2d.twin.update.success{${var.filter_tags}} by {resource_group,region,name}.as_count() )
|
||||
) * 100 > ${var.failed_c2d_twin_update_rate_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -258,9 +250,9 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
|
||||
|
||||
query = <<EOF
|
||||
sum(last_5m):(
|
||||
avg:azure.devices_iothubs.d2c.twin.read.failure{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
|
||||
( avg:azure.devices_iothubs.d2c.twin.read.failure{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.devices_iothubs.d2c.twin.read.success{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() )
|
||||
avg:azure.devices_iothubs.d2c.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() /
|
||||
( avg:azure.devices_iothubs.d2c.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.devices_iothubs.d2c.twin.read.success{${var.filter_tags}} by {resource_group,region,name}.as_count() )
|
||||
) * 100 > ${var.failed_d2c_twin_read_rate_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -291,9 +283,9 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
|
||||
|
||||
query = <<EOF
|
||||
sum(last_5m):(
|
||||
avg:azure.devices_iothubs.d2c.twin.update.failure{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
|
||||
( avg:azure.devices_iothubs.d2c.twin.update.failure{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.devices_iothubs.d2c.twin.update.success{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() )
|
||||
avg:azure.devices_iothubs.d2c.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() /
|
||||
( avg:azure.devices_iothubs.d2c.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.devices_iothubs.d2c.twin.update.success{${var.filter_tags}} by {resource_group,region,name}.as_count() )
|
||||
) * 100 > ${var.failed_d2c_twin_update_rate_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -324,7 +316,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
|
||||
|
||||
query = <<EOF
|
||||
sum(last_5m): (
|
||||
avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
|
||||
avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_count()
|
||||
) > ${var.dropped_d2c_telemetry_egress_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -355,7 +347,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
|
||||
|
||||
query = <<EOF
|
||||
sum(last_5m): (
|
||||
avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
|
||||
avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_count()
|
||||
) > ${var.orphaned_d2c_telemetry_egress_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -386,7 +378,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
|
||||
|
||||
query = <<EOF
|
||||
sum(last_5m): (
|
||||
avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
|
||||
avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_count()
|
||||
) > ${var.invalid_d2c_telemetry_egress_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -417,7 +409,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
|
||||
|
||||
query = <<EOF
|
||||
sum(last_5m): (
|
||||
avg:azure.devices_iothubs.d2c.telemetry.egress.fallback{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
|
||||
avg:azure.devices_iothubs.d2c.telemetry.egress.fallback{${var.filter_tags}} by {resource_group,region,name}.as_count()
|
||||
) > ${var.fallback_d2c_telemetry_egress_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -448,8 +440,8 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
|
||||
|
||||
query = <<EOF
|
||||
sum(last_5m): (
|
||||
avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() -
|
||||
avg:azure.devices_iothubs.d2c.telemetry.ingress.success{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
|
||||
avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol{${var.filter_tags}} by {resource_group,region,name}.as_count() -
|
||||
avg:azure.devices_iothubs.d2c.telemetry.ingress.success{${var.filter_tags}} by {resource_group,region,name}.as_count()
|
||||
) > 0
|
||||
EOF
|
||||
|
||||
|
||||
166
cloud/azure/monitors.tf
Normal file
166
cloud/azure/monitors.tf
Normal file
@ -0,0 +1,166 @@
|
||||
module "apimanagement" {
|
||||
source = "./apimanagement"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${var.message}"
|
||||
delay = "${var.delay}"
|
||||
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
|
||||
failed_requests_threshold_critical = "${var.apimanagement_failed_requests_threshold_critical}"
|
||||
other_requests_threshold_critical = "${var.apimanagement_other_requests_threshold_critical}"
|
||||
successful_requests_threshold_critical = "${var.apimanagement_successful_requests_threshold_critical}"
|
||||
unauthorized_requests_threshold_critical = "${var.apimanagement_unauthorized_requests_threshold_critical}"
|
||||
}
|
||||
|
||||
module "appservices" {
|
||||
source = "./app-services"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${var.message}"
|
||||
delay = "${var.delay}"
|
||||
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
|
||||
http_2xx_requests_threshold_critical = "${var.appservices_http_2xx_requests_threshold_critical}"
|
||||
http_2xx_requests_threshold_warning = "${var.appservices_http_2xx_requests_threshold_warning}"
|
||||
http_5xx_requests_threshold_critical = "${var.appservices_http_5xx_requests_threshold_critical}"
|
||||
http_5xx_requests_threshold_warning = "${var.appservices_http_5xx_requests_threshold_warning}"
|
||||
http_4xx_requests_threshold_critical = "${var.appservices_http_4xx_requests_threshold_critical}"
|
||||
http_4xx_requests_threshold_warning = "${var.appservices_http_4xx_requests_threshold_warning}"
|
||||
memory_usage_threshold_critical = "${var.appservices_memory_usage_threshold_critical}"
|
||||
memory_usage_threshold_warning = "${var.appservices_memory_usage_threshold_warning}"
|
||||
response_time_threshold_critical = "${var.appservices_response_time_threshold_critical}"
|
||||
response_time_threshold_warning = "${var.appservices_response_time_threshold_warning}"
|
||||
}
|
||||
|
||||
module "eventhub" {
|
||||
source = "./eventhub"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${var.message}"
|
||||
delay = "${var.delay}"
|
||||
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
|
||||
errors_rate_thresold_critical = "${var.eventhub_errors_rate_thresold_critical}"
|
||||
errors_rate_thresold_warning = "${var.eventhub_errors_rate_thresold_warning}"
|
||||
failed_requests_rate_thresold_critical = "${var.eventhub_failed_requests_rate_thresold_critical}"
|
||||
failed_requests_rate_thresold_warning = "${var.eventhub_failed_requests_rate_thresold_warning}"
|
||||
}
|
||||
|
||||
module "iothub" {
|
||||
source = "./iothubs"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${var.message}"
|
||||
delay = "${var.delay}"
|
||||
|
||||
filter_tags = "${var.non_taggable_filter_tags}"
|
||||
|
||||
dropped_d2c_telemetry_egress_threshold_critical = "${var.iothub_dropped_d2c_telemetry_egress_threshold_critical}"
|
||||
dropped_d2c_telemetry_egress_threshold_warning = "${var.iothub_dropped_d2c_telemetry_egress_threshold_warning}"
|
||||
failed_c2d_methods_rate_threshold_critical = "${var.iothub_failed_c2d_methods_rate_threshold_critical}"
|
||||
failed_c2d_methods_rate_threshold_warning = "${var.iothub_failed_c2d_methods_rate_threshold_warning}"
|
||||
failed_c2d_twin_read_rate_threshold_critical = "${var.iothub_failed_c2d_twin_read_rate_threshold_critical}"
|
||||
failed_c2d_twin_read_rate_threshold_warning = "${var.iothub_failed_c2d_twin_read_rate_threshold_warning}"
|
||||
failed_c2d_twin_update_rate_threshold_critical = "${var.iothub_failed_c2d_twin_update_rate_threshold_critical}"
|
||||
failed_c2d_twin_update_rate_threshold_warning = "${var.iothub_failed_c2d_twin_update_rate_threshold_warning}"
|
||||
failed_d2c_twin_read_rate_threshold_critical = "${var.iothub_failed_d2c_twin_read_rate_threshold_critical}"
|
||||
failed_d2c_twin_read_rate_threshold_warning = "${var.iothub_failed_d2c_twin_read_rate_threshold_warning}"
|
||||
failed_d2c_twin_update_rate_threshold_critical = "${var.iothub_failed_d2c_twin_update_rate_threshold_critical}"
|
||||
failed_d2c_twin_update_rate_threshold_warning = "${var.iothub_failed_d2c_twin_update_rate_threshold_warning}"
|
||||
failed_jobs_rate_threshold_critical = "${var.iothub_failed_jobs_rate_threshold_critical}"
|
||||
failed_jobs_rate_threshold_warning = "${var.iothub_failed_jobs_rate_threshold_warning}"
|
||||
failed_listjobs_rate_threshold_critical = "${var.iothub_failed_listjobs_rate_threshold_critical}"
|
||||
failed_listjobs_rate_threshold_warning = "${var.iothub_failed_listjobs_rate_threshold_warning}"
|
||||
failed_queryjobs_rate_threshold_critical = "${var.iothub_failed_queryjobs_rate_threshold_critical}"
|
||||
failed_queryjobs_rate_threshold_warning = "${var.iothub_failed_queryjobs_rate_threshold_warning}"
|
||||
fallback_d2c_telemetry_egress_threshold_critical = "${var.iothub_fallback_d2c_telemetry_egress_threshold_critical}"
|
||||
fallback_d2c_telemetry_egress_threshold_warning = "${var.iothub_fallback_d2c_telemetry_egress_threshold_warning}"
|
||||
invalid_d2c_telemetry_egress_threshold_critical = "${var.iothub_invalid_d2c_telemetry_egress_threshold_critical}"
|
||||
invalid_d2c_telemetry_egress_threshold_warning = "${var.iothub_invalid_d2c_telemetry_egress_threshold_warning}"
|
||||
orphaned_d2c_telemetry_egress_threshold_critical = "${var.iothub_orphaned_d2c_telemetry_egress_threshold_critical}"
|
||||
orphaned_d2c_telemetry_egress_threshold_warning = "${var.iothub_orphaned_d2c_telemetry_egress_threshold_warning}"
|
||||
}
|
||||
|
||||
module "redis" {
|
||||
source = "./redis"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${var.message}"
|
||||
delay = "${var.delay}"
|
||||
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
|
||||
evictedkeys_limit_threshold_critical = "${var.redis_evictedkeys_limit_threshold_critical}"
|
||||
evictedkeys_limit_threshold_warning = "${var.redis_evictedkeys_limit_threshold_warning}"
|
||||
percent_processor_time_threshold_critical = "${var.redis_percent_processor_time_threshold_critical}"
|
||||
percent_processor_time_threshold_warning = "${var.redis_percent_processor_time_threshold_warning}"
|
||||
server_load_rate_threshold_critical = "${var.redis_server_load_rate_threshold_critical}"
|
||||
server_load_rate_threshold_warning = "${var.redis_server_load_rate_threshold_warning}"
|
||||
}
|
||||
|
||||
module "sqldatabase" {
|
||||
source = "./sql-database"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${var.message}"
|
||||
delay = "${var.delay}"
|
||||
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
|
||||
cpu_threshold_critical = "${var.sqldatabase_cpu_threshold_critical}"
|
||||
cpu_threshold_warning = "${var.sqldatabase_cpu_threshold_warning}"
|
||||
deadlock_threshold_critical = "${var.sqldatabase_deadlock_threshold_critical}"
|
||||
diskspace_threshold_critical = "${var.sqldatabase_diskspace_threshold_critical}"
|
||||
diskspace_threshold_warning = "${var.sqldatabase_diskspace_threshold_warning}"
|
||||
dtu_threshold_critical = "${var.sqldatabase_dtu_threshold_critical}"
|
||||
dtu_threshold_warning = "${var.sqldatabase_dtu_threshold_warning}"
|
||||
}
|
||||
|
||||
module "storage" {
|
||||
source = "./storage"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${var.message}"
|
||||
delay = "${var.delay}"
|
||||
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
|
||||
authorization_error_requests_threshold_critical = "${var.storage_authorization_error_requests_threshold_critical}"
|
||||
availability_threshold_critical = "${var.storage_availability_threshold_critical}"
|
||||
client_other_error_requests_threshold_critical = "${var.storage_client_other_error_requests_threshold_critical}"
|
||||
latency_threshold_critical = "${var.storage_latency_threshold_critical}"
|
||||
network_error_requests_threshold_critical = "${var.storage_network_error_requests_threshold_critical}"
|
||||
server_other_error_requests_threshold_critical = "${var.storage_server_other_error_requests_threshold_critical}"
|
||||
successful_requests_threshold_critical = "${var.storage_successful_requests_threshold_critical}"
|
||||
throttling_error_requests_threshold_critical = "${var.storage_throttling_error_requests_threshold_critical}"
|
||||
timeout_error_requests_threshold_critical = "${var.storage_timeout_error_requests_threshold_critical}"
|
||||
}
|
||||
|
||||
module "streamanalytics" {
|
||||
source = "./stream-analytics"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${var.message}"
|
||||
delay = "${var.delay}"
|
||||
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
|
||||
conversion_errors_threshold_critical = "${var.streamanalytics_conversion_errors_threshold_critical}"
|
||||
conversion_errors_threshold_warning = "${var.streamanalytics_conversion_errors_threshold_warning}"
|
||||
failed_function_requests_threshold_critical = "${var.streamanalytics_failed_function_requests_threshold_critical}"
|
||||
function_requests_threshold_warning = "${var.streamanalytics_function_requests_threshold_warning}"
|
||||
runtime_errors_threshold_critical = "${var.streamanalytics_runtime_errors_threshold_critical}"
|
||||
runtime_errors_threshold_warning = "${var.streamanalytics_runtime_errors_threshold_warning}"
|
||||
su_utilization_threshold_critical = "${var.streamanalytics_su_utilization_threshold_critical}"
|
||||
su_utilization_threshold_warning = "${var.streamanalytics_su_utilization_threshold_warning}"
|
||||
}
|
||||
@ -28,7 +28,7 @@ Inputs
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| cpu_threshold_critical | CPU usage in percent (critical threshold) | string | `90` | no |
|
||||
| cpu_threshold_warning | CPU usage in percent (warning threshold) | string | `` | no |
|
||||
| cpu_threshold_warning | CPU usage in percent (warning threshold) | string | `80` | no |
|
||||
| deadlock_threshold_critical | Amount of Deadlocks (critical threshold) | string | `1` | no |
|
||||
| delay | Delay in seconds for the metric evaluation | string | `600` | no |
|
||||
| diskspace_threshold_critical | Disk space used in percent (critical threshold) | string | `90` | no |
|
||||
|
||||
@ -28,7 +28,7 @@ variable "filter_tags_custom" {
|
||||
|
||||
variable "cpu_threshold_warning" {
|
||||
description = "CPU usage in percent (warning threshold)"
|
||||
default = ""
|
||||
default = "80"
|
||||
}
|
||||
|
||||
variable "cpu_threshold_critical" {
|
||||
|
||||
@ -32,20 +32,20 @@ Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| authorization_error_requests_threshold_critical | Maximum acceptable percent of authorization error requests for a storage | string | `15` | no |
|
||||
| availability_threshold_critical | Minimum acceptable percent of availability for a storage | string | `90` | no |
|
||||
| client_other_error_requests_threshold_critical | Maximum acceptable percent of client other error requests for a storage | string | `15` | no |
|
||||
| delay | Delay in seconds for the metric evaluation | string | `600` | no |
|
||||
| environment | Architecture environment | string | - | yes |
|
||||
| message | Message sent when a monitor is triggered | string | - | yes |
|
||||
| filter_tags_use_defaults | Use default tagging convention | string | `true` | no |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| availability_threshold_critical | Minimum threshold of availability | string | `90` | no |
|
||||
| successful_requests_threshold_critical | Minimum threshold of successful requests | string | `90` | no |
|
||||
| latency_threshold_critical | Maximum threshold of latency in ms | string | `1000` | no |
|
||||
| timeout_error_requests_threshold_critical | Maximum threshold of timeout error requests in percent | string | `35` | no |
|
||||
| network_error_requests_threshold_critical | Maximum threshold of network error requests in percent | string | `35` | no |
|
||||
| throttling_error_requests_threshold_critical | Maximum threshold of throttling error requests in percent | string | `50` | no |
|
||||
| server_other_error_requests_threshold_critical | Maximum threshold of server other error requests in percent | string | `50` | no |
|
||||
| client_other_error_requests_threshold_critical | Maximum threshold of client other error requests in percent | string | `75` | no |
|
||||
| authorization_error_requests_threshold_critical | Maximum threshold of authorization error requests in percent | string | `75` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| latency_threshold_critical | Maximum acceptable end to end latency (ms) for a storage | string | `1000` | no |
|
||||
| message | Message sent when a Redis monitor is triggered | string | - | yes |
|
||||
| network_error_requests_threshold_critical | Maximum acceptable percent of network error requests for a storage | string | `5` | no |
|
||||
| server_other_error_requests_threshold_critical | Maximum acceptable percent of server other error requests for a storage | string | `10` | no |
|
||||
| successful_requests_threshold_critical | Minimum acceptable percent of successful requests for a storage | string | `90` | no |
|
||||
| throttling_error_requests_threshold_critical | Maximum acceptable percent of throttling error requests for a storage | string | `10` | no |
|
||||
| timeout_error_requests_threshold_critical | Maximum acceptable percent of timeout error requests for a storage | string | `5` | no |
|
||||
|
||||
Related documentation
|
||||
---------------------
|
||||
|
||||
@ -23,17 +23,15 @@ Inputs
|
||||
| conversion_errors_threshold_warning | Conversion errors limit (warning threshold) | string | `0` | no |
|
||||
| delay | Delay in seconds for the metric evaluation | string | `600` | no |
|
||||
| environment | Architecture environment | string | - | yes |
|
||||
| function_requests_threshold_critical | Failed Function Request rate limit (critical threshold) | string | `10` | no |
|
||||
| failed_function_requests_threshold_critical | Failed Function Request rate limit (critical threshold) | string | `10` | no |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| function_requests_threshold_warning | Failed Function Request rate limit (warning threshold) | string | `0` | no |
|
||||
| message | Message sent when a monitor is triggered | string | - | yes |
|
||||
| provider | What is the monitored provider | string | azure | no |
|
||||
| runtime_errors_threshold_critical | | string | `10` | no |
|
||||
| runtime_errors_threshold_warning | | string | `0` | no |
|
||||
| su_utilization_threshold_critical | | string | `80` | no |
|
||||
| su_utilization_threshold_warning | Monitor specific | string | `60` | no |
|
||||
| service | What is the monitored service | string | storage | no |
|
||||
| subscription_id | Azure account id used as filter for monitors | string | - | yes |
|
||||
| use_filter_tags | Filter the data with service tags if true | string | `true` | no |
|
||||
| message | Message sent when a Redis monitor is triggered | string | - | yes |
|
||||
| runtime_errors_threshold_critical | Runtime errors limit (critical threshold) | string | `10` | no |
|
||||
| runtime_errors_threshold_warning | Runtime errors limit (warning threshold) | string | `0` | no |
|
||||
| su_utilization_threshold_critical | Streaming Unit utilization rate limit (critical threshold) | string | `80` | no |
|
||||
| su_utilization_threshold_warning | Streaming Unit utilization rate limit (warning threshold) | string | `60` | no |
|
||||
|
||||
Related documentation
|
||||
---------------------
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user