aboutsummaryrefslogtreecommitdiff
path: root/docs
diff options
context:
space:
mode:
authorBjörn Rabenstein <beorn@grafana.com>2019-08-15 00:40:51 +0200
committerGitHub <noreply@github.com>2019-08-15 00:40:51 +0200
commit7ef6f2576d5bef2fabb9e033a92da4d5a835915b (patch)
treee2e912538a89ef9077fcb6d3b8d3b55a28239599 /docs
parent0d3a2d32091fede144f1b2669899230ce23b555b (diff)
downloadprometheus_node_collector-7ef6f2576d5bef2fabb9e033a92da4d5a835915b.tar.bz2
prometheus_node_collector-7ef6f2576d5bef2fabb9e033a92da4d5a835915b.tar.xz
prometheus_node_collector-7ef6f2576d5bef2fabb9e033a92da4d5a835915b.zip
node-mxin: Improve nodes dashboard (#1448)
* node-mixin: Improve nodes dashboard - Use stacking where it makes sense. - Normalize idle CPU so that stacking is more meaningful. - Consistently fill where stacking is used but don't fill where not. - Fix y axis max value for Idle CPU panel. - Fix y axis min value for memory usage panel. - Use `$__interval` for range where applicable (and set min step to 1m). - Make the right Y axis for disk I/O actually work. This is just an incremental improvements. It doesn't touch the more involved TODOs. Signed-off-by: beorn7 <beorn@grafana.com>
Diffstat (limited to 'docs')
-rw-r--r--docs/node-mixin/dashboards/node.libsonnet57
1 files changed, 42 insertions, 15 deletions
diff --git a/docs/node-mixin/dashboards/node.libsonnet b/docs/node-mixin/dashboards/node.libsonnet
index 5980f8c..8e279c8 100644
--- a/docs/node-mixin/dashboards/node.libsonnet
+++ b/docs/node-mixin/dashboards/node.libsonnet
@@ -16,25 +16,30 @@ local gauge = promgrafonnet.gauge;
16 datasource='$datasource', 16 datasource='$datasource',
17 span=6, 17 span=6,
18 format='percentunit', 18 format='percentunit',
19 max=100, 19 max=1,
20 min=0, 20 min=0,
21 stack=true,
21 ) 22 )
22 .addTarget(prometheus.target( 23 .addTarget(prometheus.target(
23 // TODO: Consider using `${__interval}` as range and a 1m min step.
24 ||| 24 |||
25 1 - rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"}[1m]) 25 (
26 (1 - rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"}[$__interval]))
27 / ignoring(cpu) group_left
28 count without (cpu)( node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance"})
29 )
26 ||| % $._config, 30 ||| % $._config,
27 legendFormat='{{cpu}}', 31 legendFormat='{{cpu}}',
28 intervalFactor=10, 32 intervalFactor=5,
33 interval='1m',
29 )); 34 ));
30 35
31 // TODO: Is this panel useful?
32 local systemLoad = 36 local systemLoad =
33 graphPanel.new( 37 graphPanel.new(
34 'Load Average', 38 'Load Average',
35 datasource='$datasource', 39 datasource='$datasource',
36 span=6, 40 span=6,
37 format='short', 41 format='short',
42 fill=0,
38 ) 43 )
39 .addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='1m load average')) 44 .addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='1m load average'))
40 .addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='5m load average')) 45 .addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance"}' % $._config, legendFormat='5m load average'))
@@ -46,6 +51,8 @@ local gauge = promgrafonnet.gauge;
46 datasource='$datasource', 51 datasource='$datasource',
47 span=9, 52 span=9,
48 format='bytes', 53 format='bytes',
54 stack=true,
55 min=0,
49 ) 56 )
50 .addTarget(prometheus.target( 57 .addTarget(prometheus.target(
51 ||| 58 |||
@@ -84,20 +91,32 @@ local gauge = promgrafonnet.gauge;
84 'Disk I/O', 91 'Disk I/O',
85 datasource='$datasource', 92 datasource='$datasource',
86 span=9, 93 span=9,
94 fill=0,
87 ) 95 )
88 // TODO: Does it make sense to have those three in the same panel? 96 // TODO: Does it make sense to have those three in the same panel?
89 // TODO: Consider using `${__interval}` as range and a 1m min step. 97 .addTarget(prometheus.target(
90 .addTarget(prometheus.target('rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[1m])' % $._config, legendFormat='{{device}} read')) 98 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config,
91 .addTarget(prometheus.target('rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[1m])' % $._config, legendFormat='{{device}} written')) 99 legendFormat='{{device}} read',
92 .addTarget(prometheus.target('rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[1m])' % $._config, legendFormat='{{device}} io time')) + 100 interval='1m',
101 ))
102 .addTarget(prometheus.target(
103 'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config,
104 legendFormat='{{device}} written',
105 interval='1m',
106 ))
107 .addTarget(prometheus.target(
108 'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config,
109 legendFormat='{{device}} io time',
110 interval='1m',
111 )) +
93 { 112 {
94 seriesOverrides: [ 113 seriesOverrides: [
95 { 114 {
96 alias: 'read', 115 alias: '/ read| written/',
97 yaxis: 1, 116 yaxis: 1,
98 }, 117 },
99 { 118 {
100 alias: 'io time', 119 alias: '/ io time/',
101 yaxis: 2, 120 yaxis: 2,
102 }, 121 },
103 ], 122 ],
@@ -129,9 +148,13 @@ local gauge = promgrafonnet.gauge;
129 datasource='$datasource', 148 datasource='$datasource',
130 span=6, 149 span=6,
131 format='bytes', 150 format='bytes',
151 fill=0,
132 ) 152 )
133 // TODO: Consider using `${__interval}` as range and a 1m min step. 153 .addTarget(prometheus.target(
134 .addTarget(prometheus.target('rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[1m])' % $._config, legendFormat='{{device}}')); 154 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__interval])' % $._config,
155 legendFormat='{{device}}',
156 interval='1m',
157 ));
135 158
136 local networkTransmitted = 159 local networkTransmitted =
137 graphPanel.new( 160 graphPanel.new(
@@ -139,9 +162,13 @@ local gauge = promgrafonnet.gauge;
139 datasource='$datasource', 162 datasource='$datasource',
140 span=6, 163 span=6,
141 format='bytes', 164 format='bytes',
165 fill=0,
142 ) 166 )
143 // TODO: Consider using `${__interval}` as range and a 1m min step. 167 .addTarget(prometheus.target(
144 .addTarget(prometheus.target('rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[1m])' % $._config, legendFormat='{{device}}')); 168 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__interval])' % $._config,
169 legendFormat='{{device}}',
170 interval='1m',
171 ));
145 172
146 dashboard.new('Nodes', time_from='now-1h') 173 dashboard.new('Nodes', time_from='now-1h')
147 .addTemplate( 174 .addTemplate(