From 36dc7451c95dbac064fff72a2929d1c6dc82b187 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Mon, 22 Jul 2019 14:06:27 +0200 Subject: Improvement of comments and panel titles Signed-off-by: beorn7 --- docs/node-mixin/dashboards/use.libsonnet | 9 +++++++-- docs/node-mixin/rules/rules.libsonnet | 8 ++++---- 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'docs') diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet index 160cfd8..7499493 100644 --- a/docs/node-mixin/dashboards/use.libsonnet +++ b/docs/node-mixin/dashboards/use.libsonnet @@ -23,7 +23,8 @@ local g = import 'grafana-builder/grafana.libsonnet'; { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, ) .addPanel( - // TODO: Is this a useful panel? + // TODO: Is this a useful panel? At least there should be some explanation how load + // average relates to the "CPU saturation" in the title. g.panel('CPU Saturation (load1 per CPU)') + g.queryPanel(||| ( @@ -58,6 +59,8 @@ local g = import 'grafana-builder/grafana.libsonnet'; g.panel('Disk IO Utilisation') + // Full utilisation would be all disks on each node spending an average of // 1 second per second doing I/O, normalize by metric cardinality for stacked charts. + // TODO: Does the partition by device make sense? Using the most utilized device per + // instance might make more sense. g.queryPanel(||| ( instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s} @@ -113,7 +116,7 @@ local g = import 'grafana-builder/grafana.libsonnet'; .addRow( g.row('Storage') .addPanel( - g.panel('Disk Capacity') + + g.panel('Disk Space Utilisation') + g.queryPanel(||| ( sum without (device) ( @@ -145,6 +148,8 @@ local g = import 'grafana-builder/grafana.libsonnet'; { yaxes: g.yaxes('percentunit') }, ) .addPanel( + // TODO: Is this a useful panel? At least there should be some explanation how load + // average relates to the "CPU saturation" in the title. g.panel('CPU Saturation (Load1)') + g.queryPanel('instance:node_cpu_saturation_load1:{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Saturation') + { yaxes: g.yaxes('percentunit') }, diff --git a/docs/node-mixin/rules/rules.libsonnet b/docs/node-mixin/rules/rules.libsonnet index b5efc6a..8bb3703 100644 --- a/docs/node-mixin/rules/rules.libsonnet +++ b/docs/node-mixin/rules/rules.libsonnet @@ -28,7 +28,7 @@ // This is CPU saturation: 1min avg run queue length / number of CPUs. // Can go over 1. // TODO: There are situation where a run queue >1/core is just normal and fine. - // We need to clarify how to lead this metric and if its usage is helpful at all. + // We need to clarify how to read this metric and if its usage is helpful at all. record: 'instance:node_load1_per_cpu:ratio', expr: ||| ( @@ -39,7 +39,7 @@ ||| % $._config, }, { - // Memory utilisation per node, normalized by per-node memory + // Memory utilisation (ratio of used memory per instance). record: 'instance:node_memory_utilisation:ratio', expr: ||| 1 - ( @@ -60,14 +60,14 @@ ||| % $._config, }, { - // Disk utilisation (seconds spent, 1 second rate) + // Disk utilisation (seconds spent, 1 second rate). record: 'instance_device:node_disk_io_time_seconds:rate1m', expr: ||| rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[1m]) ||| % $._config, }, { - // Disk saturation (weighted seconds spent, 1 second rate) + // Disk saturation (weighted seconds spent, 1 second rate). record: 'instance_device:node_disk_io_time_weighted_seconds:rate1m', expr: ||| rate(node_disk_io_time_weighted_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[1m]) -- cgit v1.2.3