aboutsummaryrefslogtreecommitdiff
path: root/docs
diff options
context:
space:
mode:
authorbeorn7 <beorn@grafana.com>2019-08-13 21:54:28 +0200
committerbeorn7 <beorn@grafana.com>2019-08-13 21:54:28 +0200
commitf350aaf87e9f484dcb774b9a8b7a2d50a8af7d96 (patch)
tree3b6bace2e7ee1cdfed221db9e7890e5d5345018d /docs
parent697c2deed59b414e73197e537192ef320533ceb3 (diff)
downloadprometheus_node_collector-f350aaf87e9f484dcb774b9a8b7a2d50a8af7d96.tar.bz2
prometheus_node_collector-f350aaf87e9f484dcb774b9a8b7a2d50a8af7d96.tar.xz
prometheus_node_collector-f350aaf87e9f484dcb774b9a8b7a2d50a8af7d96.zip
node-mixin: Fix various straight-forward issues in the USE dashboards
- Normalize cluster memory utilisation. - Fix missing `1m` in memory saturation. - Have both disk-related row next to each other instead with the network row in between. - Correctly render transmit network traffic as negative, using `seriesOverrides` and `min: null` for the y-axis. - Make panel and row naming consistent. - Remove legend where it would just display a single entry with exactly the title of the panel. - Fix metric name in individual node CPU Saturation panel. - Break up disk space utilisation by device in the panel for an individual node. NB: All of that doesn't touch any more subtle issues captured in the various TODOs. Signed-off-by: beorn7 <beorn@grafana.com>
Diffstat (limited to 'docs')
-rw-r--r--docs/node-mixin/dashboards/use.libsonnet192
1 files changed, 131 insertions, 61 deletions
diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet
index b74adef..1a75daa 100644
--- a/docs/node-mixin/dashboards/use.libsonnet
+++ b/docs/node-mixin/dashboards/use.libsonnet
@@ -42,19 +42,80 @@ local g = import 'grafana-builder/grafana.libsonnet';
42 g.row('Memory') 42 g.row('Memory')
43 .addPanel( 43 .addPanel(
44 g.panel('Memory Utilisation') + 44 g.panel('Memory Utilisation') +
45 g.queryPanel('instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) + 45 g.queryPanel(|||
46 (
47 instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}
48 / ignoring (instance) group_left
49 count without (instance) (instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s})
50 )
51 ||| % $._config, '{{instance}}', legendLink) +
46 g.stack + 52 g.stack +
47 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, 53 { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
48 ) 54 )
49 .addPanel( 55 .addPanel(
50 g.panel('Memory Saturation (Swapped Pages)') + 56 g.panel('Memory Saturation (Swapped Pages)') +
51 g.queryPanel('instance:node_memory_swap_io_pages:rate{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) + 57 g.queryPanel('instance:node_memory_swap_io_pages:rate1m{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) +
52 g.stack + 58 g.stack +
53 { yaxes: g.yaxes('rps') }, 59 { yaxes: g.yaxes('rps') },
54 ) 60 )
55 ) 61 )
56 .addRow( 62 .addRow(
57 g.row('Disk') 63 g.row('Network')
64 .addPanel(
65 g.panel('Net Utilisation (Bytes Receive/Transmit)') +
66 g.queryPanel(
67 [
68 'instance:node_network_receive_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config,
69 'instance:node_network_transmit_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config,
70 ],
71 ['{{instance}} Receive', '{{instance}} Transmit'],
72 legendLink,
73 ) +
74 g.stack +
75 {
76 yaxes: g.yaxes({ format: 'Bps', min: null }),
77 seriesOverrides: [
78 {
79 alias: '/ Receive/',
80 stack: 'A',
81 },
82 {
83 alias: '/ Transmit/',
84 stack: 'B',
85 transform: 'negative-Y',
86 },
87 ],
88 },
89 )
90 .addPanel(
91 g.panel('Net Saturation (Drops Receive/Transmit)') +
92 g.queryPanel(
93 [
94 'instance:node_network_receive_drop_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config,
95 'instance:node_network_transmit_drop_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config,
96 ],
97 ['{{instance}} Receive', '{{instance}} Transmit'],
98 legendLink,
99 ) +
100 g.stack +
101 {
102 yaxes: g.yaxes({ format: 'rps', min: null }),
103 seriesOverrides: [
104 {
105 alias: '/ Receive/',
106 stack: 'A',
107 },
108 {
109 alias: '/ Transmit/',
110 stack: 'B',
111 transform: 'negative-Y',
112 },
113 ],
114 },
115 )
116 )
117 .addRow(
118 g.row('Disk IO')
58 .addPanel( 119 .addPanel(
59 g.panel('Disk IO Utilisation') + 120 g.panel('Disk IO Utilisation') +
60 // Full utilisation would be all disks on each node spending an average of 121 // Full utilisation would be all disks on each node spending an average of
@@ -85,36 +146,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
85 ) 146 )
86 ) 147 )
87 .addRow( 148 .addRow(
88 g.row('Network') 149 g.row('Disk Space')
89 .addPanel(
90 g.panel('Net Utilisation (Bytes Receive/Transmit)') +
91 g.queryPanel(
92 [
93 'instance:node_network_receive_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config,
94 '-instance:node_network_transmit_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config,
95 ],
96 ['{{instance}} Receive', '{{instance}} Transmit'],
97 legendLink,
98 ) +
99 g.stack +
100 { yaxes: g.yaxes('Bps') },
101 )
102 .addPanel(
103 g.panel('Net Saturation (Drops Receive/Transmit)') +
104 g.queryPanel(
105 [
106 'instance:node_network_receive_drop_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config,
107 '-instance:node_network_transmit_drop_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config,
108 ],
109 ['{{instance}} Receive', '{{instance}} Transmit'],
110 legendLink,
111 ) +
112 g.stack +
113 { yaxes: g.yaxes('rps') },
114 )
115 )
116 .addRow(
117 g.row('Storage')
118 .addPanel( 150 .addPanel(
119 g.panel('Disk Space Utilisation') + 151 g.panel('Disk Space Utilisation') +
120 g.queryPanel(||| 152 g.queryPanel(|||
@@ -145,14 +177,20 @@ local g = import 'grafana-builder/grafana.libsonnet';
145 .addPanel( 177 .addPanel(
146 g.panel('CPU Utilisation') + 178 g.panel('CPU Utilisation') +
147 g.queryPanel('instance:node_cpu_utilisation:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Utilisation') + 179 g.queryPanel('instance:node_cpu_utilisation:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Utilisation') +
148 { yaxes: g.yaxes('percentunit') }, 180 {
181 yaxes: g.yaxes('percentunit'),
182 legend+: { show: false },
183 },
149 ) 184 )
150 .addPanel( 185 .addPanel(
151 // TODO: Is this a useful panel? At least there should be some explanation how load 186 // TODO: Is this a useful panel? At least there should be some explanation how load
152 // average relates to the "CPU saturation" in the title. 187 // average relates to the "CPU saturation" in the title.
153 g.panel('CPU Saturation (Load1)') + 188 g.panel('CPU Saturation (Load1 per CPU)') +
154 g.queryPanel('instance:node_cpu_saturation_load1:{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Saturation') + 189 g.queryPanel('instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Saturation') +
155 { yaxes: g.yaxes('percentunit') }, 190 {
191 yaxes: g.yaxes('percentunit'),
192 legend+: { show: false },
193 },
156 ) 194 )
157 ) 195 )
158 .addRow( 196 .addRow(
@@ -165,20 +203,10 @@ local g = import 'grafana-builder/grafana.libsonnet';
165 .addPanel( 203 .addPanel(
166 g.panel('Memory Saturation (pages swapped per second)') + 204 g.panel('Memory Saturation (pages swapped per second)') +
167 g.queryPanel('instance:node_memory_swap_io_pages:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Swap IO') + 205 g.queryPanel('instance:node_memory_swap_io_pages:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Swap IO') +
168 { yaxes: g.yaxes('short') }, 206 {
169 ) 207 yaxes: g.yaxes('short'),
170 ) 208 legend+: { show: false },
171 .addRow( 209 },
172 g.row('Disk')
173 .addPanel(
174 g.panel('Disk IO Utilisation') +
175 g.queryPanel('instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Utilisation {{device}}') +
176 { yaxes: g.yaxes('percentunit') },
177 )
178 .addPanel(
179 g.panel('Disk IO Saturation') +
180 g.queryPanel('instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Saturation {{device}}') +
181 { yaxes: g.yaxes('percentunit') },
182 ) 210 )
183 ) 211 )
184 .addRow( 212 .addRow(
@@ -188,37 +216,79 @@ local g = import 'grafana-builder/grafana.libsonnet';
188 g.queryPanel( 216 g.queryPanel(
189 [ 217 [
190 'instance:node_network_receive_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 218 'instance:node_network_receive_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
191 '-instance:node_network_transmit_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 219 'instance:node_network_transmit_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
192 ], 220 ],
193 ['Receive', 'Transmit'], 221 ['Receive', 'Transmit'],
194 ) + 222 ) +
195 { yaxes: g.yaxes('Bps') }, 223 {
224 yaxes: g.yaxes({ format: 'Bps', min: null }),
225 seriesOverrides: [
226 {
227 alias: '/Receive/',
228 stack: 'A',
229 },
230 {
231 alias: '/Transmit/',
232 stack: 'B',
233 transform: 'negative-Y',
234 },
235 ],
236 },
196 ) 237 )
197 .addPanel( 238 .addPanel(
198 g.panel('Net Saturation (Drops Receive/Transmit)') + 239 g.panel('Net Saturation (Drops Receive/Transmit)') +
199 g.queryPanel( 240 g.queryPanel(
200 [ 241 [
201 'instance:node_network_receive_drop_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 242 'instance:node_network_receive_drop_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
202 '-instance:node_network_transmit_drop_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 243 'instance:node_network_transmit_drop_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
203 ], 244 ],
204 ['Receive drops', 'Transmit drops'], 245 ['Receive drops', 'Transmit drops'],
205 ) + 246 ) +
206 { yaxes: g.yaxes('rps') }, 247 {
248 yaxes: g.yaxes({ format: 'rps', min: null }),
249 seriesOverrides: [
250 {
251 alias: '/Receive/',
252 stack: 'A',
253 },
254 {
255 alias: '/Transmit/',
256 stack: 'B',
257 transform: 'negative-Y',
258 },
259 ],
260 },
207 ) 261 )
208 ) 262 )
209 .addRow( 263 .addRow(
210 g.row('Disk') 264 g.row('Disk IO')
211 .addPanel( 265 .addPanel(
212 g.panel('Disk Utilisation') + 266 g.panel('Disk IO Utilisation') +
267 g.queryPanel('instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') +
268 { yaxes: g.yaxes('percentunit') },
269 )
270 .addPanel(
271 g.panel('Disk IO Saturation') +
272 g.queryPanel('instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') +
273 { yaxes: g.yaxes('percentunit') },
274 )
275 )
276 .addRow(
277 g.row('Disk Space')
278 .addPanel(
279 g.panel('Disk Space Utilisation') +
213 g.queryPanel(||| 280 g.queryPanel(|||
214 1 - 281 1 -
215 ( 282 (
216 sum(max without (mountpoint, fstype) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s})) 283 max without (mountpoint, fstype) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, instance="$instance"}})
217 / 284 /
218 sum(max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s})) 285 max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, instance="$instance"}})
219 ) 286 )
220 ||| % $._config, 'Disk') + 287 ||| % $._config, '{{device}}') +
221 { yaxes: g.yaxes('percentunit') }, 288 {
289 yaxes: g.yaxes('percentunit'),
290 legend+: { show: false },
291 },
222 ), 292 ),
223 ), 293 ),
224 }, 294 },