diff options
author | beorn7 <beorn@grafana.com> | 2019-08-14 22:24:24 +0200 |
---|---|---|
committer | beorn7 <beorn@grafana.com> | 2019-08-14 22:24:24 +0200 |
commit | 97ef11376219a1e3ee2c5f21f105bdeb26ef43d0 (patch) | |
tree | 470eba513f8c6c837ea932f8701d79aca54b0643 /docs | |
parent | 697c2deed59b414e73197e537192ef320533ceb3 (diff) | |
download | prometheus_node_collector-97ef11376219a1e3ee2c5f21f105bdeb26ef43d0.tar.bz2 prometheus_node_collector-97ef11376219a1e3ee2c5f21f105bdeb26ef43d0.tar.xz prometheus_node_collector-97ef11376219a1e3ee2c5f21f105bdeb26ef43d0.zip |
Make the severity of "critical" alerts configurable
This addresses the blissful scenario where single-node failures are
unproblematic. No reason to wake somebody up if a node is about to
screw itself up by filling the disk.
Signed-off-by: beorn7 <beorn@grafana.com>
Diffstat (limited to 'docs')
-rw-r--r-- | docs/node-mixin/alerts/alerts.libsonnet | 8 | ||||
-rw-r--r-- | docs/node-mixin/config.libsonnet | 13 |
2 files changed, 17 insertions, 4 deletions
diff --git a/docs/node-mixin/alerts/alerts.libsonnet b/docs/node-mixin/alerts/alerts.libsonnet index 7b9fb89..4423f89 100644 --- a/docs/node-mixin/alerts/alerts.libsonnet +++ b/docs/node-mixin/alerts/alerts.libsonnet | |||
@@ -37,7 +37,7 @@ | |||
37 | ||| % $._config, | 37 | ||| % $._config, |
38 | 'for': '1h', | 38 | 'for': '1h', |
39 | labels: { | 39 | labels: { |
40 | severity: 'critical', | 40 | severity: '%(nodeCriticalSeverity)s' % $._config, |
41 | }, | 41 | }, |
42 | annotations: { | 42 | annotations: { |
43 | summary: 'Filesystem is predicted to run out of space within the next 4 hours.', | 43 | summary: 'Filesystem is predicted to run out of space within the next 4 hours.', |
@@ -73,7 +73,7 @@ | |||
73 | ||| % $._config, | 73 | ||| % $._config, |
74 | 'for': '1h', | 74 | 'for': '1h', |
75 | labels: { | 75 | labels: { |
76 | severity: 'critical', | 76 | severity: '%(nodeCriticalSeverity)s' % $._config, |
77 | }, | 77 | }, |
78 | annotations: { | 78 | annotations: { |
79 | summary: 'Filesystem has less than 3% space left.', | 79 | summary: 'Filesystem has less than 3% space left.', |
@@ -113,7 +113,7 @@ | |||
113 | ||| % $._config, | 113 | ||| % $._config, |
114 | 'for': '1h', | 114 | 'for': '1h', |
115 | labels: { | 115 | labels: { |
116 | severity: 'critical', | 116 | severity: '%(nodeCriticalSeverity)s' % $._config, |
117 | }, | 117 | }, |
118 | annotations: { | 118 | annotations: { |
119 | summary: 'Filesystem is predicted to run out of inodes within the next 4 hours.', | 119 | summary: 'Filesystem is predicted to run out of inodes within the next 4 hours.', |
@@ -149,7 +149,7 @@ | |||
149 | ||| % $._config, | 149 | ||| % $._config, |
150 | 'for': '1h', | 150 | 'for': '1h', |
151 | labels: { | 151 | labels: { |
152 | severity: 'critical', | 152 | severity: '%(nodeCriticalSeverity)s' % $._config, |
153 | }, | 153 | }, |
154 | annotations: { | 154 | annotations: { |
155 | summary: 'Filesystem has less than 3% inodes left.', | 155 | summary: 'Filesystem has less than 3% inodes left.', |
diff --git a/docs/node-mixin/config.libsonnet b/docs/node-mixin/config.libsonnet index 95070ca..8cf9860 100644 --- a/docs/node-mixin/config.libsonnet +++ b/docs/node-mixin/config.libsonnet | |||
@@ -17,6 +17,19 @@ | |||
17 | // them here, e.g. 'device!="tmpfs"'. | 17 | // them here, e.g. 'device!="tmpfs"'. |
18 | diskDeviceSelector: '', | 18 | diskDeviceSelector: '', |
19 | 19 | ||
20 | // Some of the alerts are meant to fire if a critical failure of a | ||
21 | // node is imminent (e.g. the disk is about to run full). In a | ||
22 | // true “cloud native” setup, failures of a single node should be | ||
23 | // tolerated. Hence, even imminent failure of a single node is no | ||
24 | // reason to create a paging alert. However, in practice there are | ||
25 | // still many situations where operators like to get paged in time | ||
26 | // before a node runs out of disk space. nodeCriticalSeverity can | ||
27 | // be set to the desired severity for this kind of alerts. This | ||
28 | // can even be templated to depend on labels of the node, e.g. you | ||
29 | // could make this critical for traditional database masters but | ||
30 | // just a warning for K8s nodes. | ||
31 | nodeCriticalSeverity: 'critical', | ||
32 | |||
20 | grafana_prefix: '', | 33 | grafana_prefix: '', |
21 | }, | 34 | }, |
22 | } | 35 | } |