diff options
author | Henk <hbokhoven@gmail.com> | 2019-04-08 15:50:29 +0200 |
---|---|---|
committer | Ben Kochie <superq@gmail.com> | 2019-04-08 15:50:29 +0200 |
commit | fbe390709f5370cd9092233239208fb51f46d15f (patch) | |
tree | d750c267902bc156ba4719595f2d718643b2aaf3 /text_collector_examples | |
parent | b8b0195d6d76358dfb592cf7e49781a443bf1849 (diff) | |
download | prometheus_node_collector-fbe390709f5370cd9092233239208fb51f46d15f.tar.bz2 prometheus_node_collector-fbe390709f5370cd9092233239208fb51f46d15f.tar.xz prometheus_node_collector-fbe390709f5370cd9092233239208fb51f46d15f.zip |
Add nvme_metrics.sh text collector example (#1309)
* Add nvme_metrics.sh text collector example
Signed-off-by: Henk <henk@wearespindle.com>
Diffstat (limited to 'text_collector_examples')
-rw-r--r-- | text_collector_examples/nvme_metrics.sh | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/text_collector_examples/nvme_metrics.sh b/text_collector_examples/nvme_metrics.sh new file mode 100644 index 0000000..5cc23cf --- /dev/null +++ b/text_collector_examples/nvme_metrics.sh | |||
@@ -0,0 +1,97 @@ | |||
1 | #!/usr/bin/env bash | ||
2 | set -eu | ||
3 | |||
4 | # Dependencies: nvme-cli, jq (packages) | ||
5 | # Based on code from | ||
6 | # - https://github.com/prometheus/node_exporter/blob/master/text_collector_examples/smartmon.sh | ||
7 | # - https://github.com/prometheus/node_exporter/blob/master/text_collector_examples/mellanox_hca_temp | ||
8 | # - https://github.com/vorlon/check_nvme/blob/master/check_nvme.sh | ||
9 | # | ||
10 | # Author: Henk <henk@wearespindle.com> | ||
11 | |||
12 | # Check if we are root | ||
13 | if [ "$EUID" -ne 0 ]; then | ||
14 | echo "${0##*/}: Please run as root!" >&2 | ||
15 | exit 1 | ||
16 | fi | ||
17 | |||
18 | # Check if programs are installed | ||
19 | if ! command -v nvme >/dev/null 2>&1; then | ||
20 | echo "${0##*/}: nvme is not installed. Aborting." >&2 | ||
21 | exit 1 | ||
22 | fi | ||
23 | |||
24 | output_format_awk="$( | ||
25 | cat <<'OUTPUTAWK' | ||
26 | BEGIN { v = "" } | ||
27 | v != $1 { | ||
28 | print "# HELP nvme_" $1 " SMART metric " $1; | ||
29 | if ($1 ~ /_total$/) | ||
30 | print "# TYPE nvme_" $1 " counter"; | ||
31 | else | ||
32 | print "# TYPE nvme_" $1 " gauge"; | ||
33 | v = $1 | ||
34 | } | ||
35 | {print "nvme_" $0} | ||
36 | OUTPUTAWK | ||
37 | )" | ||
38 | |||
39 | format_output() { | ||
40 | sort | awk -F'{' "${output_format_awk}" | ||
41 | } | ||
42 | |||
43 | # Get the nvme-cli version | ||
44 | nvme_version="$(nvme version | awk '$1 == "nvme" {print $3}')" | ||
45 | echo "nvmecli{version=\"${nvme_version}\"} 1" | format_output | ||
46 | |||
47 | # Get devices | ||
48 | device_list="$(nvme list | awk '/^\/dev/{print $1}')" | ||
49 | |||
50 | # Loop through the NVMe devices | ||
51 | for device in ${device_list}; do | ||
52 | json_check="$(nvme smart-log -o json "${device}")" | ||
53 | disk="$(echo "${device}" | cut -c6-10)" | ||
54 | |||
55 | # The temperature value in JSON is in Kelvin, we want Celsius | ||
56 | value_temperature="$(echo "$json_check" | jq '.temperature - 273')" | ||
57 | echo "temperature_celcius{device=\"${disk}\"} ${value_temperature}" | ||
58 | |||
59 | value_available_spare="$(echo "$json_check" | jq '.avail_spare / 100')" | ||
60 | echo "available_spare_ratio{device=\"${disk}\"} ${value_available_spare}" | ||
61 | |||
62 | value_available_spare_threshold="$(echo "$json_check" | jq '.spare_thresh / 100')" | ||
63 | echo "available_spare_threshold_ratio{device=\"${disk}\"} ${value_available_spare_threshold}" | ||
64 | |||
65 | value_percentage_used="$(echo "$json_check" | jq '.percent_used / 100')" | ||
66 | echo "percentage_used_ratio{device=\"${disk}\"} ${value_percentage_used}" | ||
67 | |||
68 | value_critical_warning="$(echo "$json_check" | jq '.critical_warning')" | ||
69 | echo "critical_warning_total{device=\"${disk}\"} ${value_critical_warning}" | ||
70 | |||
71 | value_media_errors="$(echo "$json_check" | jq '.media_errors')" | ||
72 | echo "media_errors_total{device=\"${disk}\"} ${value_media_errors}" | ||
73 | |||
74 | value_num_err_log_entries="$(echo "$json_check" | jq '.num_err_log_entries')" | ||
75 | echo "num_err_log_entries_total{device=\"${disk}\"} ${value_num_err_log_entries}" | ||
76 | |||
77 | value_power_cycles="$(echo "$json_check" | jq '.power_cycles')" | ||
78 | echo "power_cycles_total{device=\"${disk}\"} ${value_power_cycles}" | ||
79 | |||
80 | value_power_on_hours="$(echo "$json_check" | jq '.power_on_hours')" | ||
81 | echo "power_on_hours_total{device=\"${disk}\"} ${value_power_on_hours}" | ||
82 | |||
83 | value_controller_busy_time="$(echo "$json_check" | jq '.controller_busy_time')" | ||
84 | echo "controller_busy_time_seconds{device=\"${disk}\"} ${value_controller_busy_time}" | ||
85 | |||
86 | value_data_units_written="$(echo "$json_check" | jq '.data_units_written')" | ||
87 | echo "data_units_written_total{device=\"${disk}\"} ${value_data_units_written}" | ||
88 | |||
89 | value_data_units_read="$(echo "$json_check" | jq '.data_units_read')" | ||
90 | echo "data_units_read_total{device=\"${disk}\"} ${value_data_units_read}" | ||
91 | |||
92 | value_host_read_commands="$(echo "$json_check" | jq '.host_read_commands')" | ||
93 | echo "host_read_commands_total{device=\"${disk}\"} ${value_host_read_commands}" | ||
94 | |||
95 | value_host_write_commands="$(echo "$json_check" | jq '.host_write_commands')" | ||
96 | echo "host_write_commands_total{device=\"${disk}\"} ${value_host_write_commands}" | ||
97 | done | format_output | ||