aboutsummaryrefslogtreecommitdiff
path: root/docs
diff options
context:
space:
mode:
Diffstat (limited to 'docs')
-rw-r--r--docs/node-mixin/config.libsonnet7
-rw-r--r--docs/node-mixin/dashboards/use.libsonnet48
2 files changed, 20 insertions, 35 deletions
diff --git a/docs/node-mixin/config.libsonnet b/docs/node-mixin/config.libsonnet
index b25c393..fdea71d 100644
--- a/docs/node-mixin/config.libsonnet
+++ b/docs/node-mixin/config.libsonnet
@@ -2,7 +2,12 @@
2 _config+:: { 2 _config+:: {
3 // Selectors are inserted between {} in Prometheus queries. 3 // Selectors are inserted between {} in Prometheus queries.
4 4
5 // Select the metrics coming from the node exporter. 5 // Select the metrics coming from the node exporter. Note that all
6 // the selected metrics are shown stacked on top of each other in
7 // the 'USE Method / Cluster' dashboard. Consider disabling that
8 // dashboard if mixing up all those metrics in the same dashboard
9 // doesn't make sense (e.g. because they are coming from different
10 // clusters).
6 nodeExporterSelector: 'job="node"', 11 nodeExporterSelector: 'job="node"',
7 12
8 // Select the fstype for filesystem-related queries. If left 13 // Select the fstype for filesystem-related queries. If left
diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet
index d2a568f..3d2e16d 100644
--- a/docs/node-mixin/dashboards/use.libsonnet
+++ b/docs/node-mixin/dashboards/use.libsonnet
@@ -15,9 +15,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
15 instance:node_cpu_utilisation:rate1m{%(nodeExporterSelector)s} 15 instance:node_cpu_utilisation:rate1m{%(nodeExporterSelector)s}
16 * 16 *
17 instance:node_num_cpu:sum{%(nodeExporterSelector)s} 17 instance:node_num_cpu:sum{%(nodeExporterSelector)s}
18 / ignoring (instance) group_left
19 sum without (instance) (instance:node_num_cpu:sum{%(nodeExporterSelector)s})
20 ) 18 )
19 / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s}))
21 ||| % $._config, '{{instance}}', legendLink) + 20 ||| % $._config, '{{instance}}', legendLink) +
22 g.stack + 21 g.stack +
23 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, 22 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
@@ -27,11 +26,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
27 // average relates to the "CPU saturation" in the title. 26 // average relates to the "CPU saturation" in the title.
28 g.panel('CPU Saturation (load1 per CPU)') + 27 g.panel('CPU Saturation (load1 per CPU)') +
29 g.queryPanel(||| 28 g.queryPanel(|||
30 ( 29 instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s}
31 instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s} 30 / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s}))
32 / ignoring (instance) group_left
33 count without (instance) (instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s})
34 )
35 ||| % $._config, '{{instance}}', legendLink) + 31 ||| % $._config, '{{instance}}', legendLink) +
36 g.stack + 32 g.stack +
37 // TODO: Does `max: 1` make sense? The stack can go over 1 in high-load scenarios. 33 // TODO: Does `max: 1` make sense? The stack can go over 1 in high-load scenarios.
@@ -43,11 +39,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
43 .addPanel( 39 .addPanel(
44 g.panel('Memory Utilisation') + 40 g.panel('Memory Utilisation') +
45 g.queryPanel(||| 41 g.queryPanel(|||
46 ( 42 instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}
47 instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s} 43 / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}))
48 / ignoring (instance) group_left
49 count without (instance) (instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s})
50 )
51 ||| % $._config, '{{instance}}', legendLink) + 44 ||| % $._config, '{{instance}}', legendLink) +
52 g.stack + 45 g.stack +
53 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, 46 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
@@ -123,11 +116,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
123 // TODO: Does the partition by device make sense? Using the most utilized device per 116 // TODO: Does the partition by device make sense? Using the most utilized device per
124 // instance might make more sense. 117 // instance might make more sense.
125 g.queryPanel(||| 118 g.queryPanel(|||
126 ( 119 instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s}
127 instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s} 120 / scalar(count(instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s}))
128 / ignoring (instance, device) group_left
129 count without (instance, device) (instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s})
130 )
131 ||| % $._config, '{{instance}} {{device}}', legendLink) + 121 ||| % $._config, '{{instance}} {{device}}', legendLink) +
132 g.stack + 122 g.stack +
133 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, 123 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
@@ -135,11 +125,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
135 .addPanel( 125 .addPanel(
136 g.panel('Disk IO Saturation') + 126 g.panel('Disk IO Saturation') +
137 g.queryPanel(||| 127 g.queryPanel(|||
138 ( 128 instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s}
139 instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s} 129 / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s}))
140 / ignoring (instance, device) group_left
141 count without (instance, device) (instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s})
142 )
143 ||| % $._config, '{{instance}} {{device}}', legendLink) + 130 ||| % $._config, '{{instance}} {{device}}', legendLink) +
144 g.stack + 131 g.stack +
145 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, 132 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
@@ -150,19 +137,12 @@ local g = import 'grafana-builder/grafana.libsonnet';
150 .addPanel( 137 .addPanel(
151 g.panel('Disk Space Utilisation') + 138 g.panel('Disk Space Utilisation') +
152 g.queryPanel(||| 139 g.queryPanel(|||
153 ( 140 sum without (device) (
154 sum without (device) ( 141 max without (fstype, mountpoint) (
155 max without (fstype, mountpoint) ( 142 node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s}
156 node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s}
157 )
158 )
159 / ignoring (instance) group_left
160 sum without (instance, device) (
161 max without (fstype, mountpoint) (
162 node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s}
163 )
164 ) 143 )
165 ) 144 )
145 / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s})))
166 ||| % $._config, '{{instance}}', legendLink) + 146 ||| % $._config, '{{instance}}', legendLink) +
167 g.stack + 147 g.stack +
168 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, 148 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },