diff options
author | Björn Rabenstein <beorn@grafana.com> | 2019-08-15 00:40:51 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-08-15 00:40:51 +0200 |
commit | 7ef6f2576d5bef2fabb9e033a92da4d5a835915b (patch) | |
tree | e2e912538a89ef9077fcb6d3b8d3b55a28239599 /docs | |
parent | 0d3a2d32091fede144f1b2669899230ce23b555b (diff) | |
download | prometheus_node_collector-7ef6f2576d5bef2fabb9e033a92da4d5a835915b.tar.bz2 prometheus_node_collector-7ef6f2576d5bef2fabb9e033a92da4d5a835915b.tar.xz prometheus_node_collector-7ef6f2576d5bef2fabb9e033a92da4d5a835915b.zip |
node-mxin: Improve nodes dashboard (#1448)
* node-mixin: Improve nodes dashboard
- Use stacking where it makes sense.
- Normalize idle CPU so that stacking is more meaningful.
- Consistently fill where stacking is used but don't fill where not.
- Fix y axis max value for Idle CPU panel.
- Fix y axis min value for memory usage panel.
- Use `$__interval` for range where applicable (and set min step
to 1m).
- Make the right Y axis for disk I/O actually work.
This is just an incremental improvements. It doesn't touch the more
involved TODOs.
Signed-off-by: beorn7 <beorn@grafana.com>
Diffstat (limited to 'docs')
-rw-r--r-- | docs/node-mixin/dashboards/node.libsonnet | 57 |
1 files changed, 42 insertions, 15 deletions
diff --git a/docs/node-mixin/dashboards/node.libsonnet b/docs/node-mixin/dashboards/node.libsonnet index 5980f8c..8e279c8 100644 --- a/docs/node-mixin/dashboards/node.libsonnet +++ b/docs/node-mixin/dashboards/node.libsonnet | |||
@@ -16,25 +16,30 @@ local gauge = promgrafonnet.gauge; | |||
16 | datasource='$datasource', | 16 | datasource='$datasource', |
17 | span=6, | 17 | span=6, |
18 | format='percentunit', | 18 | format='percentunit', |
19 | max=100, | 19 | max=1, |
20 | min=0, | 20 | min=0, |
21 | stack=true, | ||
21 | ) | 22 | ) |
22 | .addTarget(prometheus.target( | 23 | .addTarget(prometheus.target( |
23 | // TODO: Consider using `${__interval}` as range and a 1m min step. | ||
24 | ||| | 24 | ||| |
25 | 1 - rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"}[1m]) | 25 | ( |
26 | (1 - rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"}[$__interval])) | ||
27 | / ignoring(cpu) group_left | ||
28 | count without (cpu)( node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"}) | ||
29 | ) | ||
26 | ||| % $._config, | 30 | ||| % $._config, |
27 | legendFormat='{{cpu}}', | 31 | legendFormat='{{cpu}}', |
28 | intervalFactor=10, | 32 | intervalFactor=5, |
33 | interval='1m', | ||
29 | )); | 34 | )); |
30 | 35 | ||
31 | // TODO: Is this panel useful? | ||
32 | local systemLoad = | 36 | local systemLoad = |
33 | graphPanel.new( | 37 | graphPanel.new( |
34 | 'Load Average', | 38 | 'Load Average', |
35 | datasource='$datasource', | 39 | datasource='$datasource', |
36 | span=6, | 40 | span=6, |
37 | format='short', | 41 | format='short', |
42 | fill=0, | ||
38 | ) | 43 | ) |
39 | .addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='1m load average')) | 44 | .addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='1m load average')) |
40 | .addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='5m load average')) | 45 | .addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='5m load average')) |
@@ -46,6 +51,8 @@ local gauge = promgrafonnet.gauge; | |||
46 | datasource='$datasource', | 51 | datasource='$datasource', |
47 | span=9, | 52 | span=9, |
48 | format='bytes', | 53 | format='bytes', |
54 | stack=true, | ||
55 | min=0, | ||
49 | ) | 56 | ) |
50 | .addTarget(prometheus.target( | 57 | .addTarget(prometheus.target( |
51 | ||| | 58 | ||| |
@@ -84,20 +91,32 @@ local gauge = promgrafonnet.gauge; | |||
84 | 'Disk I/O', | 91 | 'Disk I/O', |
85 | datasource='$datasource', | 92 | datasource='$datasource', |
86 | span=9, | 93 | span=9, |
94 | fill=0, | ||
87 | ) | 95 | ) |
88 | // TODO: Does it make sense to have those three in the same panel? | 96 | // TODO: Does it make sense to have those three in the same panel? |
89 | // TODO: Consider using `${__interval}` as range and a 1m min step. | 97 | .addTarget(prometheus.target( |
90 | .addTarget(prometheus.target('rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[1m])' % $._config, legendFormat='{{device}} read')) | 98 | 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config, |
91 | .addTarget(prometheus.target('rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[1m])' % $._config, legendFormat='{{device}} written')) | 99 | legendFormat='{{device}} read', |
92 | .addTarget(prometheus.target('rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[1m])' % $._config, legendFormat='{{device}} io time')) + | 100 | interval='1m', |
101 | )) | ||
102 | .addTarget(prometheus.target( | ||
103 | 'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config, | ||
104 | legendFormat='{{device}} written', | ||
105 | interval='1m', | ||
106 | )) | ||
107 | .addTarget(prometheus.target( | ||
108 | 'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config, | ||
109 | legendFormat='{{device}} io time', | ||
110 | interval='1m', | ||
111 | )) + | ||
93 | { | 112 | { |
94 | seriesOverrides: [ | 113 | seriesOverrides: [ |
95 | { | 114 | { |
96 | alias: 'read', | 115 | alias: '/ read| written/', |
97 | yaxis: 1, | 116 | yaxis: 1, |
98 | }, | 117 | }, |
99 | { | 118 | { |
100 | alias: 'io time', | 119 | alias: '/ io time/', |
101 | yaxis: 2, | 120 | yaxis: 2, |
102 | }, | 121 | }, |
103 | ], | 122 | ], |
@@ -129,9 +148,13 @@ local gauge = promgrafonnet.gauge; | |||
129 | datasource='$datasource', | 148 | datasource='$datasource', |
130 | span=6, | 149 | span=6, |
131 | format='bytes', | 150 | format='bytes', |
151 | fill=0, | ||
132 | ) | 152 | ) |
133 | // TODO: Consider using `${__interval}` as range and a 1m min step. | 153 | .addTarget(prometheus.target( |
134 | .addTarget(prometheus.target('rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[1m])' % $._config, legendFormat='{{device}}')); | 154 | 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__interval])' % $._config, |
155 | legendFormat='{{device}}', | ||
156 | interval='1m', | ||
157 | )); | ||
135 | 158 | ||
136 | local networkTransmitted = | 159 | local networkTransmitted = |
137 | graphPanel.new( | 160 | graphPanel.new( |
@@ -139,9 +162,13 @@ local gauge = promgrafonnet.gauge; | |||
139 | datasource='$datasource', | 162 | datasource='$datasource', |
140 | span=6, | 163 | span=6, |
141 | format='bytes', | 164 | format='bytes', |
165 | fill=0, | ||
142 | ) | 166 | ) |
143 | // TODO: Consider using `${__interval}` as range and a 1m min step. | 167 | .addTarget(prometheus.target( |
144 | .addTarget(prometheus.target('rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[1m])' % $._config, legendFormat='{{device}}')); | 168 | 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__interval])' % $._config, |
169 | legendFormat='{{device}}', | ||
170 | interval='1m', | ||
171 | )); | ||
145 | 172 | ||
146 | dashboard.new('Nodes', time_from='now-1h') | 173 | dashboard.new('Nodes', time_from='now-1h') |
147 | .addTemplate( | 174 | .addTemplate( |