aboutsummaryrefslogtreecommitdiff
path: root/text_collector_examples
diff options
context:
space:
mode:
authorHenk <hbokhoven@gmail.com>2019-04-08 15:50:29 +0200
committerBen Kochie <superq@gmail.com>2019-04-08 15:50:29 +0200
commitfbe390709f5370cd9092233239208fb51f46d15f (patch)
treed750c267902bc156ba4719595f2d718643b2aaf3 /text_collector_examples
parentb8b0195d6d76358dfb592cf7e49781a443bf1849 (diff)
downloadprometheus_node_collector-fbe390709f5370cd9092233239208fb51f46d15f.tar.bz2
prometheus_node_collector-fbe390709f5370cd9092233239208fb51f46d15f.tar.xz
prometheus_node_collector-fbe390709f5370cd9092233239208fb51f46d15f.zip
Add nvme_metrics.sh text collector example (#1309)
* Add nvme_metrics.sh text collector example Signed-off-by: Henk <henk@wearespindle.com>
Diffstat (limited to 'text_collector_examples')
-rw-r--r--text_collector_examples/nvme_metrics.sh97
1 files changed, 97 insertions, 0 deletions
diff --git a/text_collector_examples/nvme_metrics.sh b/text_collector_examples/nvme_metrics.sh
new file mode 100644
index 0000000..5cc23cf
--- /dev/null
+++ b/text_collector_examples/nvme_metrics.sh
@@ -0,0 +1,97 @@
1#!/usr/bin/env bash
2set -eu
3
4# Dependencies: nvme-cli, jq (packages)
5# Based on code from
6# - https://github.com/prometheus/node_exporter/blob/master/text_collector_examples/smartmon.sh
7# - https://github.com/prometheus/node_exporter/blob/master/text_collector_examples/mellanox_hca_temp
8# - https://github.com/vorlon/check_nvme/blob/master/check_nvme.sh
9#
10# Author: Henk <henk@wearespindle.com>
11
12# Check if we are root
13if [ "$EUID" -ne 0 ]; then
14 echo "${0##*/}: Please run as root!" >&2
15 exit 1
16fi
17
18# Check if programs are installed
19if ! command -v nvme >/dev/null 2>&1; then
20 echo "${0##*/}: nvme is not installed. Aborting." >&2
21 exit 1
22fi
23
24output_format_awk="$(
25 cat <<'OUTPUTAWK'
26BEGIN { v = "" }
27v != $1 {
28 print "# HELP nvme_" $1 " SMART metric " $1;
29 if ($1 ~ /_total$/)
30 print "# TYPE nvme_" $1 " counter";
31 else
32 print "# TYPE nvme_" $1 " gauge";
33 v = $1
34}
35{print "nvme_" $0}
36OUTPUTAWK
37)"
38
39format_output() {
40 sort | awk -F'{' "${output_format_awk}"
41}
42
43# Get the nvme-cli version
44nvme_version="$(nvme version | awk '$1 == "nvme" {print $3}')"
45echo "nvmecli{version=\"${nvme_version}\"} 1" | format_output
46
47# Get devices
48device_list="$(nvme list | awk '/^\/dev/{print $1}')"
49
50# Loop through the NVMe devices
51for device in ${device_list}; do
52 json_check="$(nvme smart-log -o json "${device}")"
53 disk="$(echo "${device}" | cut -c6-10)"
54
55 # The temperature value in JSON is in Kelvin, we want Celsius
56 value_temperature="$(echo "$json_check" | jq '.temperature - 273')"
57 echo "temperature_celcius{device=\"${disk}\"} ${value_temperature}"
58
59 value_available_spare="$(echo "$json_check" | jq '.avail_spare / 100')"
60 echo "available_spare_ratio{device=\"${disk}\"} ${value_available_spare}"
61
62 value_available_spare_threshold="$(echo "$json_check" | jq '.spare_thresh / 100')"
63 echo "available_spare_threshold_ratio{device=\"${disk}\"} ${value_available_spare_threshold}"
64
65 value_percentage_used="$(echo "$json_check" | jq '.percent_used / 100')"
66 echo "percentage_used_ratio{device=\"${disk}\"} ${value_percentage_used}"
67
68 value_critical_warning="$(echo "$json_check" | jq '.critical_warning')"
69 echo "critical_warning_total{device=\"${disk}\"} ${value_critical_warning}"
70
71 value_media_errors="$(echo "$json_check" | jq '.media_errors')"
72 echo "media_errors_total{device=\"${disk}\"} ${value_media_errors}"
73
74 value_num_err_log_entries="$(echo "$json_check" | jq '.num_err_log_entries')"
75 echo "num_err_log_entries_total{device=\"${disk}\"} ${value_num_err_log_entries}"
76
77 value_power_cycles="$(echo "$json_check" | jq '.power_cycles')"
78 echo "power_cycles_total{device=\"${disk}\"} ${value_power_cycles}"
79
80 value_power_on_hours="$(echo "$json_check" | jq '.power_on_hours')"
81 echo "power_on_hours_total{device=\"${disk}\"} ${value_power_on_hours}"
82
83 value_controller_busy_time="$(echo "$json_check" | jq '.controller_busy_time')"
84 echo "controller_busy_time_seconds{device=\"${disk}\"} ${value_controller_busy_time}"
85
86 value_data_units_written="$(echo "$json_check" | jq '.data_units_written')"
87 echo "data_units_written_total{device=\"${disk}\"} ${value_data_units_written}"
88
89 value_data_units_read="$(echo "$json_check" | jq '.data_units_read')"
90 echo "data_units_read_total{device=\"${disk}\"} ${value_data_units_read}"
91
92 value_host_read_commands="$(echo "$json_check" | jq '.host_read_commands')"
93 echo "host_read_commands_total{device=\"${disk}\"} ${value_host_read_commands}"
94
95 value_host_write_commands="$(echo "$json_check" | jq '.host_write_commands')"
96 echo "host_write_commands_total{device=\"${disk}\"} ${value_host_write_commands}"
97done | format_output