aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoriuri aranda <iuri.aranda@gmail.com>2020-03-02 16:24:51 +0100
committerGitHub <noreply@github.com>2020-03-02 16:24:51 +0100
commit0107bc794204f50d887898da60032da890637471 (patch)
treebda809185ca7d9a65b7a8915634c851868c8f0d9
parenta7c31ff7ed0990545ed4cc62690fc53563ee8860 (diff)
downloadprometheus_node_collector-0107bc794204f50d887898da60032da890637471.tar.bz2
prometheus_node_collector-0107bc794204f50d887898da60032da890637471.tar.xz
prometheus_node_collector-0107bc794204f50d887898da60032da890637471.zip
Make FS space alerts thresholds configurable (#1624)
* Make FS space alerts thresholds configurable (#1) This makes it possible to tweak the thresholds for the NodeFilesystemSpaceFillingUp alerts. Which might be necessary in systems like Kubernetes, where the image garbage collector runs at 85%, so it's not a problem that the disk reaches that usage %. Signed-off-by: iuri aranda <iuri@skyscrapers.eu>
-rw-r--r--docs/node-mixin/alerts/alerts.libsonnet4
-rw-r--r--docs/node-mixin/config.libsonnet12
2 files changed, 14 insertions, 2 deletions
diff --git a/docs/node-mixin/alerts/alerts.libsonnet b/docs/node-mixin/alerts/alerts.libsonnet
index 0cdc16e..b95b1c5 100644
--- a/docs/node-mixin/alerts/alerts.libsonnet
+++ b/docs/node-mixin/alerts/alerts.libsonnet
@@ -8,7 +8,7 @@
8 alert: 'NodeFilesystemSpaceFillingUp', 8 alert: 'NodeFilesystemSpaceFillingUp',
9 expr: ||| 9 expr: |||
10 ( 10 (
11 node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 40 11 node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceFillingUpWarningThreshold)d
12 and 12 and
13 predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 24*60*60) < 0 13 predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 24*60*60) < 0
14 and 14 and
@@ -28,7 +28,7 @@
28 alert: 'NodeFilesystemSpaceFillingUp', 28 alert: 'NodeFilesystemSpaceFillingUp',
29 expr: ||| 29 expr: |||
30 ( 30 (
31 node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 20 31 node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceFillingUpCriticalThreshold)d
32 and 32 and
33 predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 4*60*60) < 0 33 predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 4*60*60) < 0
34 and 34 and
diff --git a/docs/node-mixin/config.libsonnet b/docs/node-mixin/config.libsonnet
index fdea71d..c06252c 100644
--- a/docs/node-mixin/config.libsonnet
+++ b/docs/node-mixin/config.libsonnet
@@ -35,6 +35,18 @@
35 // just a warning for K8s nodes. 35 // just a warning for K8s nodes.
36 nodeCriticalSeverity: 'critical', 36 nodeCriticalSeverity: 'critical',
37 37
38 // Available disk space (%) thresholds on which to trigger the
39 // 'NodeFilesystemSpaceFillingUp' alerts. These alerts fire if the disk
40 // usage grows in a way that it is predicted to run out in 4h or 1d
41 // and if the provided thresholds have been reached right now.
42 // In some cases you'll want to adjust these, e.g. by default Kubernetes
43 // runs the image garbage collection when the disk usage reaches 85%
44 // of its available space. In that case, you'll want to reduce the
45 // critical threshold below to something like 14 or 15, otherwise
46 // the alert could fire under normal node usage.
47 fsSpaceFillingUpWarningThreshold: 40,
48 fsSpaceFillingUpCriticalThreshold: 20,
49
38 grafana_prefix: '', 50 grafana_prefix: '',
39 }, 51 },
40} 52}