aboutsummaryrefslogtreecommitdiff
path: root/docs
diff options
context:
space:
mode:
authorbeorn7 <beorn@grafana.com>2019-10-30 22:52:36 +0100
committerbeorn7 <beorn@grafana.com>2019-10-30 22:52:36 +0100
commitc6914477f511fad69f6121c0abf25cdf035ae374 (patch)
tree98d993d05fa491d4a34513083ad1bcbfbacfffe9 /docs
parentf9d2bbe85476395c764188a445997960e4dae718 (diff)
downloadprometheus_node_collector-c6914477f511fad69f6121c0abf25cdf035ae374.tar.bz2
prometheus_node_collector-c6914477f511fad69f6121c0abf25cdf035ae374.tar.xz
prometheus_node_collector-c6914477f511fad69f6121c0abf25cdf035ae374.zip
Fix the normalization for the cluster-wide dashboards
We actually have to count or sum, respectively, _all_ the selected metrics for the cluster-wide view. Which means it's easiest to use the `scalar` approach after all (but only in the cluster dashboard). This still propagates all the labels. I have extended the comment for the `nodeExporterSelector` to note that the cluster dashboard only makes sense if all the selected node exporter actually belong to the same cluster. Since this is jsonnet, users can easily disable the cluster dashboard. Or even create multiple instances of the dashboards with different `nodeExporterSelector`s for different clusters. Signed-off-by: beorn7 <beorn@grafana.com>
Diffstat (limited to 'docs')
-rw-r--r--docs/node-mixin/config.libsonnet7
-rw-r--r--docs/node-mixin/dashboards/use.libsonnet48
2 files changed, 20 insertions, 35 deletions
diff --git a/docs/node-mixin/config.libsonnet b/docs/node-mixin/config.libsonnet
index b25c393..fdea71d 100644
--- a/docs/node-mixin/config.libsonnet
+++ b/docs/node-mixin/config.libsonnet
@@ -2,7 +2,12 @@
2 _config+:: { 2 _config+:: {
3 // Selectors are inserted between {} in Prometheus queries. 3 // Selectors are inserted between {} in Prometheus queries.
4 4
5 // Select the metrics coming from the node exporter. 5 // Select the metrics coming from the node exporter. Note that all
6 // the selected metrics are shown stacked on top of each other in
7 // the 'USE Method / Cluster' dashboard. Consider disabling that
8 // dashboard if mixing up all those metrics in the same dashboard
9 // doesn't make sense (e.g. because they are coming from different
10 // clusters).
6 nodeExporterSelector: 'job="node"', 11 nodeExporterSelector: 'job="node"',
7 12
8 // Select the fstype for filesystem-related queries. If left 13 // Select the fstype for filesystem-related queries. If left
diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet
index d2a568f..3d2e16d 100644
--- a/docs/node-mixin/dashboards/use.libsonnet
+++ b/docs/node-mixin/dashboards/use.libsonnet
@@ -15,9 +15,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
15 instance:node_cpu_utilisation:rate1m{%(nodeExporterSelector)s} 15 instance:node_cpu_utilisation:rate1m{%(nodeExporterSelector)s}
16 * 16 *
17 instance:node_num_cpu:sum{%(nodeExporterSelector)s} 17 instance:node_num_cpu:sum{%(nodeExporterSelector)s}
18 / ignoring (instance) group_left
19 sum without (instance) (instance:node_num_cpu:sum{%(nodeExporterSelector)s})
20 ) 18 )
19 / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s}))
21 ||| % $._config, '{{instance}}', legendLink) + 20 ||| % $._config, '{{instance}}', legendLink) +
22 g.stack + 21 g.stack +
23 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, 22 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
@@ -27,11 +26,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
27 // average relates to the "CPU saturation" in the title. 26 // average relates to the "CPU saturation" in the title.
28 g.panel('CPU Saturation (load1 per CPU)') + 27 g.panel('CPU Saturation (load1 per CPU)') +
29 g.queryPanel(||| 28 g.queryPanel(|||
30 ( 29 instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s}
31 instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s} 30 / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s}))
32 / ignoring (instance) group_left
33 count without (instance) (instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s})
34 )
35 ||| % $._config, '{{instance}}', legendLink) + 31 ||| % $._config, '{{instance}}', legendLink) +
36 g.stack + 32 g.stack +
37 // TODO: Does `max: 1` make sense? The stack can go over 1 in high-load scenarios. 33 // TODO: Does `max: 1` make sense? The stack can go over 1 in high-load scenarios.
@@ -43,11 +39,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
43 .addPanel( 39 .addPanel(
44 g.panel('Memory Utilisation') + 40 g.panel('Memory Utilisation') +
45 g.queryPanel(||| 41 g.queryPanel(|||
46 ( 42 instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}
47 instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s} 43 / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}))
48 / ignoring (instance) group_left
49 count without (instance) (instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s})
50 )
51 ||| % $._config, '{{instance}}', legendLink) + 44 ||| % $._config, '{{instance}}', legendLink) +
52 g.stack + 45 g.stack +
53 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, 46 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
@@ -123,11 +116,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
123 // TODO: Does the partition by device make sense? Using the most utilized device per 116 // TODO: Does the partition by device make sense? Using the most utilized device per
124 // instance might make more sense. 117 // instance might make more sense.
125 g.queryPanel(||| 118 g.queryPanel(|||
126 ( 119 instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s}
127 instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s} 120 / scalar(count(instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s}))
128 / ignoring (instance, device) group_left
129 count without (instance, device) (instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s})
130 )
131 ||| % $._config, '{{instance}} {{device}}', legendLink) + 121 ||| % $._config, '{{instance}} {{device}}', legendLink) +
132 g.stack + 122 g.stack +
133 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, 123 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
@@ -135,11 +125,8 @@ local g = import 'grafana-builder/grafana.libsonnet';
135 .addPanel( 125 .addPanel(
136 g.panel('Disk IO Saturation') + 126 g.panel('Disk IO Saturation') +
137 g.queryPanel(||| 127 g.queryPanel(|||
138 ( 128 instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s}
139 instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s} 129 / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s}))
140 / ignoring (instance, device) group_left
141 count without (instance, device) (instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s})
142 )
143 ||| % $._config, '{{instance}} {{device}}', legendLink) + 130 ||| % $._config, '{{instance}} {{device}}', legendLink) +
144 g.stack + 131 g.stack +
145 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, 132 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
@@ -150,19 +137,12 @@ local g = import 'grafana-builder/grafana.libsonnet';
150 .addPanel( 137 .addPanel(
151 g.panel('Disk Space Utilisation') + 138 g.panel('Disk Space Utilisation') +
152 g.queryPanel(||| 139 g.queryPanel(|||
153 ( 140 sum without (device) (
154 sum without (device) ( 141 max without (fstype, mountpoint) (
155 max without (fstype, mountpoint) ( 142 node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s}
156 node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s}
157 )
158 )
159 / ignoring (instance) group_left
160 sum without (instance, device) (
161 max without (fstype, mountpoint) (
162 node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s}
163 )
164 ) 143 )
165 ) 144 )
145 / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s})))
166 ||| % $._config, '{{instance}}', legendLink) + 146 ||| % $._config, '{{instance}}', legendLink) +
167 g.stack + 147 g.stack +
168 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, 148 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },