diff options
author | beorn7 <beorn@grafana.com> | 2019-08-13 21:54:28 +0200 |
---|---|---|
committer | beorn7 <beorn@grafana.com> | 2019-08-13 21:54:28 +0200 |
commit | f350aaf87e9f484dcb774b9a8b7a2d50a8af7d96 (patch) | |
tree | 3b6bace2e7ee1cdfed221db9e7890e5d5345018d /docs | |
parent | 697c2deed59b414e73197e537192ef320533ceb3 (diff) | |
download | prometheus_node_collector-f350aaf87e9f484dcb774b9a8b7a2d50a8af7d96.tar.bz2 prometheus_node_collector-f350aaf87e9f484dcb774b9a8b7a2d50a8af7d96.tar.xz prometheus_node_collector-f350aaf87e9f484dcb774b9a8b7a2d50a8af7d96.zip |
node-mixin: Fix various straight-forward issues in the USE dashboards
- Normalize cluster memory utilisation.
- Fix missing `1m` in memory saturation.
- Have both disk-related row next to each other instead with the
network row in between.
- Correctly render transmit network traffic as negative, using
`seriesOverrides` and `min: null` for the y-axis.
- Make panel and row naming consistent.
- Remove legend where it would just display a single entry with
exactly the title of the panel.
- Fix metric name in individual node CPU Saturation panel.
- Break up disk space utilisation by device in the panel for an
individual node.
NB: All of that doesn't touch any more subtle issues captured in the
various TODOs.
Signed-off-by: beorn7 <beorn@grafana.com>
Diffstat (limited to 'docs')
-rw-r--r-- | docs/node-mixin/dashboards/use.libsonnet | 192 |
1 files changed, 131 insertions, 61 deletions
diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet index b74adef..1a75daa 100644 --- a/docs/node-mixin/dashboards/use.libsonnet +++ b/docs/node-mixin/dashboards/use.libsonnet | |||
@@ -42,19 +42,80 @@ local g = import 'grafana-builder/grafana.libsonnet'; | |||
42 | g.row('Memory') | 42 | g.row('Memory') |
43 | .addPanel( | 43 | .addPanel( |
44 | g.panel('Memory Utilisation') + | 44 | g.panel('Memory Utilisation') + |
45 | g.queryPanel('instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) + | 45 | g.queryPanel(||| |
46 | ( | ||
47 | instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s} | ||
48 | / ignoring (instance) group_left | ||
49 | count without (instance) (instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}) | ||
50 | ) | ||
51 | ||| % $._config, '{{instance}}', legendLink) + | ||
46 | g.stack + | 52 | g.stack + |
47 | { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, | 53 | { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, |
48 | ) | 54 | ) |
49 | .addPanel( | 55 | .addPanel( |
50 | g.panel('Memory Saturation (Swapped Pages)') + | 56 | g.panel('Memory Saturation (Swapped Pages)') + |
51 | g.queryPanel('instance:node_memory_swap_io_pages:rate{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) + | 57 | g.queryPanel('instance:node_memory_swap_io_pages:rate1m{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) + |
52 | g.stack + | 58 | g.stack + |
53 | { yaxes: g.yaxes('rps') }, | 59 | { yaxes: g.yaxes('rps') }, |
54 | ) | 60 | ) |
55 | ) | 61 | ) |
56 | .addRow( | 62 | .addRow( |
57 | g.row('Disk') | 63 | g.row('Network') |
64 | .addPanel( | ||
65 | g.panel('Net Utilisation (Bytes Receive/Transmit)') + | ||
66 | g.queryPanel( | ||
67 | [ | ||
68 | 'instance:node_network_receive_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config, | ||
69 | 'instance:node_network_transmit_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config, | ||
70 | ], | ||
71 | ['{{instance}} Receive', '{{instance}} Transmit'], | ||
72 | legendLink, | ||
73 | ) + | ||
74 | g.stack + | ||
75 | { | ||
76 | yaxes: g.yaxes({ format: 'Bps', min: null }), | ||
77 | seriesOverrides: [ | ||
78 | { | ||
79 | alias: '/ Receive/', | ||
80 | stack: 'A', | ||
81 | }, | ||
82 | { | ||
83 | alias: '/ Transmit/', | ||
84 | stack: 'B', | ||
85 | transform: 'negative-Y', | ||
86 | }, | ||
87 | ], | ||
88 | }, | ||
89 | ) | ||
90 | .addPanel( | ||
91 | g.panel('Net Saturation (Drops Receive/Transmit)') + | ||
92 | g.queryPanel( | ||
93 | [ | ||
94 | 'instance:node_network_receive_drop_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config, | ||
95 | 'instance:node_network_transmit_drop_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config, | ||
96 | ], | ||
97 | ['{{instance}} Receive', '{{instance}} Transmit'], | ||
98 | legendLink, | ||
99 | ) + | ||
100 | g.stack + | ||
101 | { | ||
102 | yaxes: g.yaxes({ format: 'rps', min: null }), | ||
103 | seriesOverrides: [ | ||
104 | { | ||
105 | alias: '/ Receive/', | ||
106 | stack: 'A', | ||
107 | }, | ||
108 | { | ||
109 | alias: '/ Transmit/', | ||
110 | stack: 'B', | ||
111 | transform: 'negative-Y', | ||
112 | }, | ||
113 | ], | ||
114 | }, | ||
115 | ) | ||
116 | ) | ||
117 | .addRow( | ||
118 | g.row('Disk IO') | ||
58 | .addPanel( | 119 | .addPanel( |
59 | g.panel('Disk IO Utilisation') + | 120 | g.panel('Disk IO Utilisation') + |
60 | // Full utilisation would be all disks on each node spending an average of | 121 | // Full utilisation would be all disks on each node spending an average of |
@@ -85,36 +146,7 @@ local g = import 'grafana-builder/grafana.libsonnet'; | |||
85 | ) | 146 | ) |
86 | ) | 147 | ) |
87 | .addRow( | 148 | .addRow( |
88 | g.row('Network') | 149 | g.row('Disk Space') |
89 | .addPanel( | ||
90 | g.panel('Net Utilisation (Bytes Receive/Transmit)') + | ||
91 | g.queryPanel( | ||
92 | [ | ||
93 | 'instance:node_network_receive_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config, | ||
94 | '-instance:node_network_transmit_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config, | ||
95 | ], | ||
96 | ['{{instance}} Receive', '{{instance}} Transmit'], | ||
97 | legendLink, | ||
98 | ) + | ||
99 | g.stack + | ||
100 | { yaxes: g.yaxes('Bps') }, | ||
101 | ) | ||
102 | .addPanel( | ||
103 | g.panel('Net Saturation (Drops Receive/Transmit)') + | ||
104 | g.queryPanel( | ||
105 | [ | ||
106 | 'instance:node_network_receive_drop_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config, | ||
107 | '-instance:node_network_transmit_drop_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config, | ||
108 | ], | ||
109 | ['{{instance}} Receive', '{{instance}} Transmit'], | ||
110 | legendLink, | ||
111 | ) + | ||
112 | g.stack + | ||
113 | { yaxes: g.yaxes('rps') }, | ||
114 | ) | ||
115 | ) | ||
116 | .addRow( | ||
117 | g.row('Storage') | ||
118 | .addPanel( | 150 | .addPanel( |
119 | g.panel('Disk Space Utilisation') + | 151 | g.panel('Disk Space Utilisation') + |
120 | g.queryPanel(||| | 152 | g.queryPanel(||| |
@@ -145,14 +177,20 @@ local g = import 'grafana-builder/grafana.libsonnet'; | |||
145 | .addPanel( | 177 | .addPanel( |
146 | g.panel('CPU Utilisation') + | 178 | g.panel('CPU Utilisation') + |
147 | g.queryPanel('instance:node_cpu_utilisation:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Utilisation') + | 179 | g.queryPanel('instance:node_cpu_utilisation:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Utilisation') + |
148 | { yaxes: g.yaxes('percentunit') }, | 180 | { |
181 | yaxes: g.yaxes('percentunit'), | ||
182 | legend+: { show: false }, | ||
183 | }, | ||
149 | ) | 184 | ) |
150 | .addPanel( | 185 | .addPanel( |
151 | // TODO: Is this a useful panel? At least there should be some explanation how load | 186 | // TODO: Is this a useful panel? At least there should be some explanation how load |
152 | // average relates to the "CPU saturation" in the title. | 187 | // average relates to the "CPU saturation" in the title. |
153 | g.panel('CPU Saturation (Load1)') + | 188 | g.panel('CPU Saturation (Load1 per CPU)') + |
154 | g.queryPanel('instance:node_cpu_saturation_load1:{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Saturation') + | 189 | g.queryPanel('instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Saturation') + |
155 | { yaxes: g.yaxes('percentunit') }, | 190 | { |
191 | yaxes: g.yaxes('percentunit'), | ||
192 | legend+: { show: false }, | ||
193 | }, | ||
156 | ) | 194 | ) |
157 | ) | 195 | ) |
158 | .addRow( | 196 | .addRow( |
@@ -165,20 +203,10 @@ local g = import 'grafana-builder/grafana.libsonnet'; | |||
165 | .addPanel( | 203 | .addPanel( |
166 | g.panel('Memory Saturation (pages swapped per second)') + | 204 | g.panel('Memory Saturation (pages swapped per second)') + |
167 | g.queryPanel('instance:node_memory_swap_io_pages:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Swap IO') + | 205 | g.queryPanel('instance:node_memory_swap_io_pages:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Swap IO') + |
168 | { yaxes: g.yaxes('short') }, | 206 | { |
169 | ) | 207 | yaxes: g.yaxes('short'), |
170 | ) | 208 | legend+: { show: false }, |
171 | .addRow( | 209 | }, |
172 | g.row('Disk') | ||
173 | .addPanel( | ||
174 | g.panel('Disk IO Utilisation') + | ||
175 | g.queryPanel('instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Utilisation {{device}}') + | ||
176 | { yaxes: g.yaxes('percentunit') }, | ||
177 | ) | ||
178 | .addPanel( | ||
179 | g.panel('Disk IO Saturation') + | ||
180 | g.queryPanel('instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Saturation {{device}}') + | ||
181 | { yaxes: g.yaxes('percentunit') }, | ||
182 | ) | 210 | ) |
183 | ) | 211 | ) |
184 | .addRow( | 212 | .addRow( |
@@ -188,37 +216,79 @@ local g = import 'grafana-builder/grafana.libsonnet'; | |||
188 | g.queryPanel( | 216 | g.queryPanel( |
189 | [ | 217 | [ |
190 | 'instance:node_network_receive_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, | 218 | 'instance:node_network_receive_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, |
191 | '-instance:node_network_transmit_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, | 219 | 'instance:node_network_transmit_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, |
192 | ], | 220 | ], |
193 | ['Receive', 'Transmit'], | 221 | ['Receive', 'Transmit'], |
194 | ) + | 222 | ) + |
195 | { yaxes: g.yaxes('Bps') }, | 223 | { |
224 | yaxes: g.yaxes({ format: 'Bps', min: null }), | ||
225 | seriesOverrides: [ | ||
226 | { | ||
227 | alias: '/Receive/', | ||
228 | stack: 'A', | ||
229 | }, | ||
230 | { | ||
231 | alias: '/Transmit/', | ||
232 | stack: 'B', | ||
233 | transform: 'negative-Y', | ||
234 | }, | ||
235 | ], | ||
236 | }, | ||
196 | ) | 237 | ) |
197 | .addPanel( | 238 | .addPanel( |
198 | g.panel('Net Saturation (Drops Receive/Transmit)') + | 239 | g.panel('Net Saturation (Drops Receive/Transmit)') + |
199 | g.queryPanel( | 240 | g.queryPanel( |
200 | [ | 241 | [ |
201 | 'instance:node_network_receive_drop_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, | 242 | 'instance:node_network_receive_drop_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, |
202 | '-instance:node_network_transmit_drop_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, | 243 | 'instance:node_network_transmit_drop_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, |
203 | ], | 244 | ], |
204 | ['Receive drops', 'Transmit drops'], | 245 | ['Receive drops', 'Transmit drops'], |
205 | ) + | 246 | ) + |
206 | { yaxes: g.yaxes('rps') }, | 247 | { |
248 | yaxes: g.yaxes({ format: 'rps', min: null }), | ||
249 | seriesOverrides: [ | ||
250 | { | ||
251 | alias: '/Receive/', | ||
252 | stack: 'A', | ||
253 | }, | ||
254 | { | ||
255 | alias: '/Transmit/', | ||
256 | stack: 'B', | ||
257 | transform: 'negative-Y', | ||
258 | }, | ||
259 | ], | ||
260 | }, | ||
207 | ) | 261 | ) |
208 | ) | 262 | ) |
209 | .addRow( | 263 | .addRow( |
210 | g.row('Disk') | 264 | g.row('Disk IO') |
211 | .addPanel( | 265 | .addPanel( |
212 | g.panel('Disk Utilisation') + | 266 | g.panel('Disk IO Utilisation') + |
267 | g.queryPanel('instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') + | ||
268 | { yaxes: g.yaxes('percentunit') }, | ||
269 | ) | ||
270 | .addPanel( | ||
271 | g.panel('Disk IO Saturation') + | ||
272 | g.queryPanel('instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') + | ||
273 | { yaxes: g.yaxes('percentunit') }, | ||
274 | ) | ||
275 | ) | ||
276 | .addRow( | ||
277 | g.row('Disk Space') | ||
278 | .addPanel( | ||
279 | g.panel('Disk Space Utilisation') + | ||
213 | g.queryPanel(||| | 280 | g.queryPanel(||| |
214 | 1 - | 281 | 1 - |
215 | ( | 282 | ( |
216 | sum(max without (mountpoint, fstype) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s})) | 283 | max without (mountpoint, fstype) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, instance="$instance"}}) |
217 | / | 284 | / |
218 | sum(max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s})) | 285 | max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, instance="$instance"}}) |
219 | ) | 286 | ) |
220 | ||| % $._config, 'Disk') + | 287 | ||| % $._config, '{{device}}') + |
221 | { yaxes: g.yaxes('percentunit') }, | 288 | { |
289 | yaxes: g.yaxes('percentunit'), | ||
290 | legend+: { show: false }, | ||
291 | }, | ||
222 | ), | 292 | ), |
223 | ), | 293 | ), |
224 | }, | 294 | }, |