diff options
author | beorn7 <beorn@grafana.com> | 2019-07-22 14:06:27 +0200 |
---|---|---|
committer | beorn7 <beorn@grafana.com> | 2019-07-22 14:06:27 +0200 |
commit | 36dc7451c95dbac064fff72a2929d1c6dc82b187 (patch) | |
tree | 48718f6ef1648e53d8c01cadf6d4864bdebf8025 /docs | |
parent | e01d9f9e78536dbabf76836e355b4202535d690a (diff) | |
download | prometheus_node_collector-36dc7451c95dbac064fff72a2929d1c6dc82b187.tar.bz2 prometheus_node_collector-36dc7451c95dbac064fff72a2929d1c6dc82b187.tar.xz prometheus_node_collector-36dc7451c95dbac064fff72a2929d1c6dc82b187.zip |
Improvement of comments and panel titles
Signed-off-by: beorn7 <beorn@grafana.com>
Diffstat (limited to 'docs')
-rw-r--r-- | docs/node-mixin/dashboards/use.libsonnet | 9 | ||||
-rw-r--r-- | docs/node-mixin/rules/rules.libsonnet | 8 |
2 files changed, 11 insertions, 6 deletions
diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet index 160cfd8..7499493 100644 --- a/docs/node-mixin/dashboards/use.libsonnet +++ b/docs/node-mixin/dashboards/use.libsonnet | |||
@@ -23,7 +23,8 @@ local g = import 'grafana-builder/grafana.libsonnet'; | |||
23 | { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, | 23 | { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, |
24 | ) | 24 | ) |
25 | .addPanel( | 25 | .addPanel( |
26 | // TODO: Is this a useful panel? | 26 | // TODO: Is this a useful panel? At least there should be some explanation how load |
27 | // average relates to the "CPU saturation" in the title. | ||
27 | g.panel('CPU Saturation (load1 per CPU)') + | 28 | g.panel('CPU Saturation (load1 per CPU)') + |
28 | g.queryPanel(||| | 29 | g.queryPanel(||| |
29 | ( | 30 | ( |
@@ -58,6 +59,8 @@ local g = import 'grafana-builder/grafana.libsonnet'; | |||
58 | g.panel('Disk IO Utilisation') + | 59 | g.panel('Disk IO Utilisation') + |
59 | // Full utilisation would be all disks on each node spending an average of | 60 | // Full utilisation would be all disks on each node spending an average of |
60 | // 1 second per second doing I/O, normalize by metric cardinality for stacked charts. | 61 | // 1 second per second doing I/O, normalize by metric cardinality for stacked charts. |
62 | // TODO: Does the partition by device make sense? Using the most utilized device per | ||
63 | // instance might make more sense. | ||
61 | g.queryPanel(||| | 64 | g.queryPanel(||| |
62 | ( | 65 | ( |
63 | instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s} | 66 | instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s} |
@@ -113,7 +116,7 @@ local g = import 'grafana-builder/grafana.libsonnet'; | |||
113 | .addRow( | 116 | .addRow( |
114 | g.row('Storage') | 117 | g.row('Storage') |
115 | .addPanel( | 118 | .addPanel( |
116 | g.panel('Disk Capacity') + | 119 | g.panel('Disk Space Utilisation') + |
117 | g.queryPanel(||| | 120 | g.queryPanel(||| |
118 | ( | 121 | ( |
119 | sum without (device) ( | 122 | sum without (device) ( |
@@ -145,6 +148,8 @@ local g = import 'grafana-builder/grafana.libsonnet'; | |||
145 | { yaxes: g.yaxes('percentunit') }, | 148 | { yaxes: g.yaxes('percentunit') }, |
146 | ) | 149 | ) |
147 | .addPanel( | 150 | .addPanel( |
151 | // TODO: Is this a useful panel? At least there should be some explanation how load | ||
152 | // average relates to the "CPU saturation" in the title. | ||
148 | g.panel('CPU Saturation (Load1)') + | 153 | g.panel('CPU Saturation (Load1)') + |
149 | g.queryPanel('instance:node_cpu_saturation_load1:{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Saturation') + | 154 | g.queryPanel('instance:node_cpu_saturation_load1:{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Saturation') + |
150 | { yaxes: g.yaxes('percentunit') }, | 155 | { yaxes: g.yaxes('percentunit') }, |
diff --git a/docs/node-mixin/rules/rules.libsonnet b/docs/node-mixin/rules/rules.libsonnet index b5efc6a..8bb3703 100644 --- a/docs/node-mixin/rules/rules.libsonnet +++ b/docs/node-mixin/rules/rules.libsonnet | |||
@@ -28,7 +28,7 @@ | |||
28 | // This is CPU saturation: 1min avg run queue length / number of CPUs. | 28 | // This is CPU saturation: 1min avg run queue length / number of CPUs. |
29 | // Can go over 1. | 29 | // Can go over 1. |
30 | // TODO: There are situation where a run queue >1/core is just normal and fine. | 30 | // TODO: There are situation where a run queue >1/core is just normal and fine. |
31 | // We need to clarify how to lead this metric and if its usage is helpful at all. | 31 | // We need to clarify how to read this metric and if its usage is helpful at all. |
32 | record: 'instance:node_load1_per_cpu:ratio', | 32 | record: 'instance:node_load1_per_cpu:ratio', |
33 | expr: ||| | 33 | expr: ||| |
34 | ( | 34 | ( |
@@ -39,7 +39,7 @@ | |||
39 | ||| % $._config, | 39 | ||| % $._config, |
40 | }, | 40 | }, |
41 | { | 41 | { |
42 | // Memory utilisation per node, normalized by per-node memory | 42 | // Memory utilisation (ratio of used memory per instance). |
43 | record: 'instance:node_memory_utilisation:ratio', | 43 | record: 'instance:node_memory_utilisation:ratio', |
44 | expr: ||| | 44 | expr: ||| |
45 | 1 - ( | 45 | 1 - ( |
@@ -60,14 +60,14 @@ | |||
60 | ||| % $._config, | 60 | ||| % $._config, |
61 | }, | 61 | }, |
62 | { | 62 | { |
63 | // Disk utilisation (seconds spent, 1 second rate) | 63 | // Disk utilisation (seconds spent, 1 second rate). |
64 | record: 'instance_device:node_disk_io_time_seconds:rate1m', | 64 | record: 'instance_device:node_disk_io_time_seconds:rate1m', |
65 | expr: ||| | 65 | expr: ||| |
66 | rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[1m]) | 66 | rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[1m]) |
67 | ||| % $._config, | 67 | ||| % $._config, |
68 | }, | 68 | }, |
69 | { | 69 | { |
70 | // Disk saturation (weighted seconds spent, 1 second rate) | 70 | // Disk saturation (weighted seconds spent, 1 second rate). |
71 | record: 'instance_device:node_disk_io_time_weighted_seconds:rate1m', | 71 | record: 'instance_device:node_disk_io_time_weighted_seconds:rate1m', |
72 | expr: ||| | 72 | expr: ||| |
73 | rate(node_disk_io_time_weighted_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[1m]) | 73 | rate(node_disk_io_time_weighted_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[1m]) |