mirror of
https://github.com/prometheus/prometheus.git
synced 2025-07-03 11:03:25 +00:00
add alert for sd refresh failure (#12410)
* add alert for sd refresh failure Due to config error or sd service down, prometheus may fail to refresh sd resource, which may lead to scrape fail or irrelavant metrics. Signed-off-by: Leo Q <LeoQuote@users.noreply.github.com> * apply suggestions Signed-off-by: Leo Q <LeoQuote@users.noreply.github.com> --------- Signed-off-by: Leo Q <LeoQuote@users.noreply.github.com>
This commit is contained in:
parent
ac8abdaacd
commit
4268feb9d7
1 changed files with 14 additions and 0 deletions
|
@ -20,6 +20,20 @@
|
|||
description: 'Prometheus %(prometheusName)s has failed to reload its configuration.' % $._config,
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusSDRefreshFailure',
|
||||
expr: |||
|
||||
increase(prometheus_sd_refresh_failures_total{%(prometheusSelector)s}[10m]) > 0
|
||||
||| % $._config,
|
||||
'for': '20m',
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
summary: 'Failed Prometheus SD refresh.',
|
||||
description: 'Prometheus %(prometheusName)s has failed to refresh SD with mechanism {{$labels.mechanism}}.' % $._config,
|
||||
},
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusNotificationQueueRunningFull',
|
||||
expr: |||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue