aboutsummaryrefslogtreecommitdiff
path: root/text_collector_examples
diff options
context:
space:
mode:
authorJohannes 'fish' Ziemke <github@freigeist.org>2019-08-03 12:14:51 +0200
committerBen Kochie <superq@gmail.com>2019-08-03 12:14:51 +0200
commitfc73586c971225037aa09b5462031b9694278c74 (patch)
tree153f18c687e0c89426de3814294dcac261484c52 /text_collector_examples
parent0b710bb0c95c32402477c7df0ad74c2e4f13c4d9 (diff)
downloadprometheus_node_collector-fc73586c971225037aa09b5462031b9694278c74.tar.bz2
prometheus_node_collector-fc73586c971225037aa09b5462031b9694278c74.tar.xz
prometheus_node_collector-fc73586c971225037aa09b5462031b9694278c74.zip
Remove text_collector_examples/ (#1441)
* Remove text_collector_examples/ These have been moved to https://github.com/prometheus-community/node-exporter-textfile-collector-scripts This closes #1077 Signed-off-by: Johannes 'fish' Ziemke <github@freigeist.org>
Diffstat (limited to 'text_collector_examples')
-rw-r--r--text_collector_examples/README.md16
-rwxr-xr-xtext_collector_examples/apt.sh32
-rwxr-xr-xtext_collector_examples/btrfs_stats.py112
-rwxr-xr-xtext_collector_examples/deleted_libraries.py70
-rwxr-xr-xtext_collector_examples/directory-size.sh15
-rwxr-xr-xtext_collector_examples/inotify-instances141
-rwxr-xr-xtext_collector_examples/ipmitool89
-rwxr-xr-xtext_collector_examples/md_info.sh56
-rwxr-xr-xtext_collector_examples/md_info_detail.sh87
-rwxr-xr-xtext_collector_examples/mellanox_hca_temp59
-rwxr-xr-xtext_collector_examples/multipathd_info9
-rwxr-xr-xtext_collector_examples/ntpd_metrics.py122
-rwxr-xr-xtext_collector_examples/nvme_metrics.sh97
-rwxr-xr-xtext_collector_examples/pacman.sh33
-rwxr-xr-xtext_collector_examples/smartmon.py378
-rwxr-xr-xtext_collector_examples/smartmon.sh194
-rwxr-xr-xtext_collector_examples/storcli.py242
-rwxr-xr-xtext_collector_examples/yum.sh18
18 files changed, 2 insertions, 1768 deletions
diff --git a/text_collector_examples/README.md b/text_collector_examples/README.md
index a26592f..3794261 100644
--- a/text_collector_examples/README.md
+++ b/text_collector_examples/README.md
@@ -1,16 +1,4 @@
1# Text collector example scripts 1# Text collector example scripts
2 2
3These scripts are examples to be used with the Node Exporter Textfile 3The scripts have been moved to
4Collector. 4https://github.com/prometheus-community/node-exporter-textfile-collector-scripts
5
6To use these scripts, we recommend using a `sponge` to atomically write the output.
7
8 <collector_script> | sponge <output_file>
9
10Sponge comes from [moreutils](https://joeyh.name/code/moreutils/)
11* [brew install moreutils](http://brewformulas.org/Moreutil)
12* [apt install moreutils](https://packages.debian.org/search?keywords=moreutils)
13* [pkg install moreutils](https://www.freshports.org/sysutils/moreutils/)
14
15For more information see:
16https://github.com/prometheus/node_exporter#textfile-collector
diff --git a/text_collector_examples/apt.sh b/text_collector_examples/apt.sh
deleted file mode 100755
index 171bb0a..0000000
--- a/text_collector_examples/apt.sh
+++ /dev/null
@@ -1,32 +0,0 @@
1#!/bin/bash
2#
3# Description: Expose metrics from apt updates.
4#
5# Author: Ben Kochie <superq@gmail.com>
6
7upgrades="$(/usr/bin/apt-get --just-print upgrade \
8 | /usr/bin/awk -F'[()]' \
9 '/^Inst/ { sub("^[^ ]+ ", "", $2); gsub(" ","",$2);
10 sub("\\[", " ", $2); sub("\\]", "", $2); print $2 }' \
11 | /usr/bin/sort \
12 | /usr/bin/uniq -c \
13 | awk '{ gsub(/\\\\/, "\\\\", $2); gsub(/\"/, "\\\"", $2);
14 gsub(/\[/, "", $3); gsub(/\]/, "", $3);
15 print "apt_upgrades_pending{origin=\"" $2 "\",arch=\"" $3 "\"} " $1}'
16)"
17
18echo '# HELP apt_upgrades_pending Apt package pending updates by origin.'
19echo '# TYPE apt_upgrades_pending gauge'
20if [[ -n "${upgrades}" ]] ; then
21 echo "${upgrades}"
22else
23 echo 'apt_upgrades_pending{origin="",arch=""} 0'
24fi
25
26echo '# HELP node_reboot_required Node reboot is required for software updates.'
27echo '# TYPE node_reboot_required gauge'
28if [[ -f '/run/reboot-required' ]] ; then
29 echo 'node_reboot_required 1'
30else
31 echo 'node_reboot_required 0'
32fi
diff --git a/text_collector_examples/btrfs_stats.py b/text_collector_examples/btrfs_stats.py
deleted file mode 100755
index 68e89a8..0000000
--- a/text_collector_examples/btrfs_stats.py
+++ /dev/null
@@ -1,112 +0,0 @@
1#!/usr/bin/env python3
2
3# Collect per-device btrfs filesystem errors.
4# Designed to work on Debian and Centos 6 (with python2.6).
5
6import collections
7import glob
8import os
9import re
10import subprocess
11
12def get_btrfs_mount_points():
13 """List all btrfs mount points.
14
15 Yields:
16 (string) filesystem mount points.
17 """
18 with open("/proc/mounts") as f:
19 for line in f:
20 parts = line.split()
21 if parts[2] == "btrfs":
22 yield parts[1]
23
24def get_btrfs_errors(mountpoint):
25 """Get per-device errors for a btrfs mount point.
26
27 Args:
28 mountpoint: (string) path to a mount point.
29
30 Yields:
31 (device, error_type, error_count) tuples, where:
32 device: (string) path to block device.
33 error_type: (string) type of btrfs error.
34 error_count: (int) number of btrfs errors of a given type.
35 """
36 p = subprocess.Popen(["btrfs", "device", "stats", mountpoint],
37 stdout=subprocess.PIPE)
38 (stdout, stderr) = p.communicate()
39 if p.returncode != 0:
40 raise RuntimeError("btrfs returned exit code %d" % p.returncode)
41 for line in stdout.splitlines():
42 if line == '':
43 continue
44 # Sample line:
45 # [/dev/vdb1].flush_io_errs 0
46 m = re.search(r"^\[([^\]]+)\]\.(\S+)\s+(\d+)$", line.decode("utf-8"))
47 if not m:
48 raise RuntimeError("unexpected output from btrfs: '%s'" % line)
49 yield m.group(1), m.group(2), int(m.group(3))
50
51def btrfs_error_metrics():
52 """Collect btrfs error metrics.
53
54 Returns:
55 a list of strings to be exposed as Prometheus metrics.
56 """
57 metric = "node_btrfs_errors_total"
58 contents = [
59 "# TYPE %s counter" % metric,
60 "# HELP %s number of btrfs errors" % metric,
61 ]
62 errors_by_device = collections.defaultdict(dict)
63 for mountpoint in get_btrfs_mount_points():
64 for device, error_type, error_count in get_btrfs_errors(mountpoint):
65 contents.append(
66 '%s{mountpoint="%s",device="%s",type="%s"} %d' %
67 (metric, mountpoint, device, error_type, error_count))
68
69 if len(contents) > 2:
70 # return metrics if there are actual btrfs filesystems found
71 # (i.e. `contents` contains more than just TYPE and HELP).
72 return contents
73
74def btrfs_allocation_metrics():
75 """Collect btrfs allocation metrics.
76
77 Returns:
78 a list of strings to be exposed as Prometheus metrics.
79 """
80 prefix = 'node_btrfs_allocation'
81 metric_to_filename = {
82 'size_bytes': 'total_bytes',
83 'used_bytes': 'bytes_used',
84 'reserved_bytes': 'bytes_reserved',
85 'pinned_bytes': 'bytes_pinned',
86 'disk_size_bytes': 'disk_total',
87 'disk_used_bytes': 'disk_used',
88 }
89 contents = []
90 for m, f in metric_to_filename.items():
91 contents += [
92 "# TYPE %s_%s gauge" % (prefix, m),
93 "# HELP %s_%s btrfs allocation data (%s)" % (prefix, m, f),
94 ]
95
96 for alloc in glob.glob("/sys/fs/btrfs/*/allocation"):
97 fs = alloc.split('/')[4]
98 for type_ in ('data', 'metadata', 'system'):
99 for m, f in metric_to_filename.items():
100 filename = os.path.join(alloc, type_, f)
101 with open(filename) as f:
102 value = int(f.read().strip())
103 contents.append('%s_%s{fs="%s",type="%s"} %d' % (
104 prefix, m, fs, type_, value))
105 if len(contents) > 2*len(metric_to_filename):
106 return contents
107
108if __name__ == "__main__":
109 contents = ((btrfs_error_metrics() or []) +
110 (btrfs_allocation_metrics() or []))
111
112 print("\n".join(contents))
diff --git a/text_collector_examples/deleted_libraries.py b/text_collector_examples/deleted_libraries.py
deleted file mode 100755
index 1354d80..0000000
--- a/text_collector_examples/deleted_libraries.py
+++ /dev/null
@@ -1,70 +0,0 @@
1#!/usr/bin/env python3
2"""
3Script to count the number of deleted libraries that are linked by running
4processes and expose a summary as Prometheus metrics.
5
6The aim is to discover processes that are still using libraries that have since
7been updated, perhaps due security vulnerabilities.
8"""
9
10import errno
11import glob
12import os
13import sys
14
15
16def main():
17 processes_linking_deleted_libraries = {}
18
19 for path in glob.glob('/proc/*/maps'):
20 try:
21 with open(path, 'rb') as file:
22 for line in file:
23 part = line.decode().strip().split()
24
25 if len(part) == 7:
26 library = part[5]
27 comment = part[6]
28
29 if '/lib/' in library and '(deleted)' in comment:
30 if path not in processes_linking_deleted_libraries:
31 processes_linking_deleted_libraries[path] = {}
32
33 if library in processes_linking_deleted_libraries[path]:
34 processes_linking_deleted_libraries[path][library] += 1
35 else:
36 processes_linking_deleted_libraries[path][library] = 1
37 except EnvironmentError as e:
38 # Ignore non-existent files, since the files may have changed since
39 # we globbed.
40 if e.errno != errno.ENOENT:
41 sys.exit('Failed to open file: {0}'.format(path))
42
43 num_processes_per_library = {}
44
45 for process, library_count in processes_linking_deleted_libraries.items():
46 libraries_seen = set()
47 for library, count in library_count.items():
48 if library in libraries_seen:
49 continue
50
51 libraries_seen.add(library)
52 if library in num_processes_per_library:
53 num_processes_per_library[library] += 1
54 else:
55 num_processes_per_library[library] = 1
56
57 metric_name = 'node_processes_linking_deleted_libraries'
58 description = 'Count of running processes that link a deleted library'
59 print('# HELP {0} {1}'.format(metric_name, description))
60 print('# TYPE {0} gauge'.format(metric_name))
61
62 for library, count in num_processes_per_library.items():
63 dir_path, basename = os.path.split(library)
64 basename = basename.replace('"', '\\"')
65 dir_path = dir_path.replace('"', '\\"')
66 print('{0}{{library_path="{1}", library_name="{2}"}} {3}'.format(metric_name, dir_path, basename, count))
67
68
69if __name__ == "__main__":
70 main()
diff --git a/text_collector_examples/directory-size.sh b/text_collector_examples/directory-size.sh
deleted file mode 100755
index 4aab71d..0000000
--- a/text_collector_examples/directory-size.sh
+++ /dev/null
@@ -1,15 +0,0 @@
1#!/bin/sh
2#
3# Expose directory usage metrics, passed as an argument.
4#
5# Usage: add this to crontab:
6#
7# */5 * * * * prometheus directory-size.sh /var/lib/prometheus | sponge /var/lib/node_exporter/directory_size.prom
8#
9# sed pattern taken from https://www.robustperception.io/monitoring-directory-sizes-with-the-textfile-collector/
10#
11# Author: Antoine Beaupré <anarcat@debian.org>
12echo "# HELP node_directory_size_bytes Disk space used by some directories"
13echo "# TYPE node_directory_size_bytes gauge"
14du --block-size=1 --summarize "$@" \
15 | sed -ne 's/\\/\\\\/;s/"/\\"/g;s/^\([0-9]\+\)\t\(.*\)$/node_directory_size_bytes{directory="\2"} \1/p'
diff --git a/text_collector_examples/inotify-instances b/text_collector_examples/inotify-instances
deleted file mode 100755
index ada74d4..0000000
--- a/text_collector_examples/inotify-instances
+++ /dev/null
@@ -1,141 +0,0 @@
1#!/usr/bin/env python3
2
3"""
4Expose Linux inotify(7) instance resource consumption.
5
6Operational properties:
7
8 - This script may be invoked as an unprivileged user; in this case, metrics
9 will only be exposed for processes owned by that unprivileged user.
10
11 - No metrics will be exposed for processes that do not hold any inotify fds.
12
13Requires Python 3.5 or later.
14"""
15
16import collections
17import os
18import sys
19
20
21class Error(Exception):
22 pass
23
24
25class _PIDGoneError(Error):
26 pass
27
28
29_Process = collections.namedtuple(
30 "Process", ["pid", "uid", "command", "inotify_instances"])
31
32
33def _read_bytes(name):
34 with open(name, mode='rb') as f:
35 return f.read()
36
37
38def _pids():
39 for n in os.listdir("/proc"):
40 if not n.isdigit():
41 continue
42 yield int(n)
43
44
45def _pid_uid(pid):
46 try:
47 s = os.stat("/proc/{}".format(pid))
48 except FileNotFoundError:
49 raise _PIDGoneError()
50 return s.st_uid
51
52
53def _pid_command(pid):
54 # Avoid GNU ps(1) for it truncates comm.
55 # https://bugs.launchpad.net/ubuntu/+source/procps/+bug/295876/comments/3
56 try:
57 cmdline = _read_bytes("/proc/{}/cmdline".format(pid))
58 except FileNotFoundError:
59 raise _PIDGoneError()
60
61 if not len(cmdline):
62 return "<zombie>"
63
64 try:
65 prog = cmdline[0:cmdline.index(0x00)]
66 except ValueError:
67 prog = cmdline
68 return os.path.basename(prog).decode(encoding="ascii",
69 errors="surrogateescape")
70
71
72def _pid_inotify_instances(pid):
73 instances = 0
74 try:
75 for fd in os.listdir("/proc/{}/fd".format(pid)):
76 try:
77 target = os.readlink("/proc/{}/fd/{}".format(pid, fd))
78 except FileNotFoundError:
79 continue
80 if target == "anon_inode:inotify":
81 instances += 1
82 except FileNotFoundError:
83 raise _PIDGoneError()
84 return instances
85
86
87def _get_processes():
88 for p in _pids():
89 try:
90 yield _Process(p, _pid_uid(p), _pid_command(p),
91 _pid_inotify_instances(p))
92 except (PermissionError, _PIDGoneError):
93 continue
94
95
96def _get_processes_nontrivial():
97 return (p for p in _get_processes() if p.inotify_instances > 0)
98
99
100def _format_gauge_metric(metric_name, metric_help, samples,
101 value_func, tags_func=None, stream=sys.stdout):
102
103 def _println(*args, **kwargs):
104 if "file" not in kwargs:
105 kwargs["file"] = stream
106 print(*args, **kwargs)
107
108 def _print(*args, **kwargs):
109 if "end" not in kwargs:
110 kwargs["end"] = ""
111 _println(*args, **kwargs)
112
113 _println("# HELP {} {}".format(metric_name, metric_help))
114 _println("# TYPE {} gauge".format(metric_name))
115
116 for s in samples:
117 value = value_func(s)
118 tags = None
119 if tags_func:
120 tags = tags_func(s)
121
122 _print(metric_name)
123 if tags:
124 _print("{")
125 _print(",".join(["{}=\"{}\"".format(k, v) for k, v in tags]))
126 _print("}")
127 _print(" ")
128 _println(value)
129
130
131def main(args_unused=None):
132 _format_gauge_metric(
133 "inotify_instances",
134 "Total number of inotify instances held open by a process.",
135 _get_processes_nontrivial(),
136 lambda s: s.inotify_instances,
137 lambda s: [("pid", s.pid), ("uid", s.uid), ("command", s.command)])
138
139
140if __name__ == "__main__":
141 sys.exit(main(sys.argv))
diff --git a/text_collector_examples/ipmitool b/text_collector_examples/ipmitool
deleted file mode 100755
index e373b95..0000000
--- a/text_collector_examples/ipmitool
+++ /dev/null
@@ -1,89 +0,0 @@
1#!/usr/bin/awk -f
2
3#
4# Converts output of `ipmitool sensor` to prometheus format.
5#
6# With GNU awk:
7# ipmitool sensor | ./ipmitool > ipmitool.prom
8#
9# With BSD awk:
10# ipmitool sensor | awk -f ./ipmitool > ipmitool.prom
11#
12
13function export(values, name) {
14 if (values["metric_count"] < 1) {
15 return
16 }
17 delete values["metric_count"]
18
19 printf("# HELP %s%s %s sensor reading from ipmitool\n", namespace, name, help[name]);
20 printf("# TYPE %s%s gauge\n", namespace, name);
21 for (sensor in values) {
22 printf("%s%s{sensor=\"%s\"} %f\n", namespace, name, sensor, values[sensor]);
23 }
24}
25
26# Fields are Bar separated, with space padding.
27BEGIN {
28 FS = "[ ]*[|][ ]*";
29 namespace = "node_ipmi_";
30
31 # Friendly description of the type of sensor for HELP.
32 help["temperature_celsius"] = "Temperature";
33 help["volts"] = "Voltage";
34 help["power_watts"] = "Power";
35 help["speed_rpm"] = "Fan";
36 help["status"] = "Chassis status";
37
38 temperature_celsius["metric_count"] = 0;
39 volts["metric_count"] = 0;
40 power_watts["metric_count"] = 0;
41 speed_rpm["metric_count"] = 0;
42 status["metric_count"] = 0;
43}
44
45# Not a valid line.
46{
47 if (NF < 3) {
48 next
49 }
50}
51
52# $2 is value field.
53$2 ~ /na/ {
54 next
55}
56
57# $3 is type field.
58$3 ~ /degrees C/ {
59 temperature_celsius[$1] = $2;
60 temperature_celsius["metric_count"]++;
61}
62
63$3 ~ /Volts/ {
64 volts[$1] = $2;
65 volts["metric_count"]++;
66}
67
68$3 ~ /Watts/ {
69 power_watts[$1] = $2;
70 power_watts["metric_count"]++;
71}
72
73$3 ~ /RPM/ {
74 speed_rpm[$1] = $2;
75 speed_rpm["metric_count"]++;
76}
77
78$3 ~ /discrete/ {
79 status[$1] = sprintf("%d", substr($2,3,2));
80 status["metric_count"]++;
81}
82
83END {
84 export(temperature_celsius, "temperature_celsius");
85 export(volts, "volts");
86 export(power_watts, "power_watts");
87 export(speed_rpm, "speed_rpm");
88 export(status, "status");
89}
diff --git a/text_collector_examples/md_info.sh b/text_collector_examples/md_info.sh
deleted file mode 100755
index c89f10f..0000000
--- a/text_collector_examples/md_info.sh
+++ /dev/null
@@ -1,56 +0,0 @@
1#!/usr/bin/env bash
2set -eu
3
4for MD_DEVICE in /dev/md/*; do
5 # Subshell to avoid eval'd variables from leaking between iterations
6 (
7 # Resolve symlink to discover device, e.g. /dev/md127
8 MD_DEVICE_NUM=$(readlink -f "${MD_DEVICE}")
9
10 # Remove /dev/ prefix
11 MD_DEVICE_NUM=${MD_DEVICE_NUM#/dev/}
12 MD_DEVICE=${MD_DEVICE#/dev/md/}
13
14 # Query sysfs for info about md device
15 SYSFS_BASE="/sys/devices/virtual/block/${MD_DEVICE_NUM}/md"
16 MD_LAYOUT=$(cat "${SYSFS_BASE}/layout")
17 MD_LEVEL=$(cat "${SYSFS_BASE}/level")
18 MD_METADATA_VERSION=$(cat "${SYSFS_BASE}/metadata_version")
19 MD_NUM_RAID_DISKS=$(cat "${SYSFS_BASE}/raid_disks")
20
21 # Remove 'raid' prefix from RAID level
22 MD_LEVEL=${MD_LEVEL#raid}
23
24 # Output disk metrics
25 for RAID_DISK in ${SYSFS_BASE}/rd[0-9]*; do
26 DISK=$(readlink -f "${RAID_DISK}/block")
27 DISK_DEVICE=$(basename "${DISK}")
28 RAID_DISK_DEVICE=$(basename "${RAID_DISK}")
29 RAID_DISK_INDEX=${RAID_DISK_DEVICE#rd}
30 RAID_DISK_STATE=$(cat "${RAID_DISK}/state")
31
32 DISK_SET=""
33 # Determine disk set using logic from mdadm: https://github.com/neilbrown/mdadm/commit/2c096ebe4b
34 if [[ ${RAID_DISK_STATE} == "in_sync" && ${MD_LEVEL} == 10 && $((MD_LAYOUT & ~0x1ffff)) ]]; then
35 NEAR_COPIES=$((MD_LAYOUT & 0xff))
36 FAR_COPIES=$(((MD_LAYOUT >> 8) & 0xff))
37 COPIES=$((NEAR_COPIES * FAR_COPIES))
38
39 if [[ $((MD_NUM_RAID_DISKS % COPIES == 0)) && $((COPIES <= 26)) ]]; then
40 DISK_SET=$((RAID_DISK_INDEX % COPIES))
41 fi
42 fi
43
44 echo -n "node_md_disk_info{disk_device=\"${DISK_DEVICE}\", md_device=\"${MD_DEVICE_NUM}\""
45 if [[ -n ${DISK_SET} ]]; then
46 SET_LETTERS=({A..Z})
47 echo -n ", md_set=\"${SET_LETTERS[${DISK_SET}]}\""
48 fi
49 echo "} 1"
50 done
51
52 # Output RAID array metrics
53 # NOTE: Metadata version is a label rather than a separate metric because the version can be a string
54 echo "node_md_info{md_device=\"${MD_DEVICE_NUM}\", md_name=\"${MD_DEVICE}\", raid_level=\"${MD_LEVEL}\", md_metadata_version=\"${MD_METADATA_VERSION}\"} 1"
55 )
56done
diff --git a/text_collector_examples/md_info_detail.sh b/text_collector_examples/md_info_detail.sh
deleted file mode 100755
index 9806ebb..0000000
--- a/text_collector_examples/md_info_detail.sh
+++ /dev/null
@@ -1,87 +0,0 @@
1#!/usr/bin/env bash
2# Note: This script uses "mdadm --detail" to get some of the metrics, so it must be run as root.
3# It is designed to be run periodically in a cronjob, and output to /var/lib/node_exporter/textfile_collector/md_info_detail.prom
4# $ cat /etc/cron.d/prometheus_md_info_detail
5# * * * * * bash /var/lib/node_exporter/md_info_detail.sh > /var/lib/node_exporter/md_info_detail.prom.$$ && mv /var/lib/node_exporter/md_info_detail.prom.$$ /var/lib/node_exporter/md_info_detail.prom
6
7set -eu
8
9for MD_DEVICE in /dev/md/*; do
10 # Subshell to avoid eval'd variables from leaking between iterations
11 (
12 # Resolve symlink to discover device, e.g. /dev/md127
13 MD_DEVICE_NUM=$(readlink -f "${MD_DEVICE}")
14
15 # Remove /dev/ prefix
16 MD_DEVICE_NUM=${MD_DEVICE_NUM#/dev/}
17 MD_DEVICE=${MD_DEVICE#/dev/md/}
18
19 # Query sysfs for info about md device
20 SYSFS_BASE="/sys/devices/virtual/block/${MD_DEVICE_NUM}/md"
21 MD_LAYOUT=$(cat "${SYSFS_BASE}/layout")
22 MD_LEVEL=$(cat "${SYSFS_BASE}/level")
23 MD_METADATA_VERSION=$(cat "${SYSFS_BASE}/metadata_version")
24 MD_NUM_RAID_DISKS=$(cat "${SYSFS_BASE}/raid_disks")
25
26 # Remove 'raid' prefix from RAID level
27 MD_LEVEL=${MD_LEVEL#raid}
28
29 # Output disk metrics
30 for RAID_DISK in ${SYSFS_BASE}/rd[0-9]*; do
31 DISK=$(readlink -f "${RAID_DISK}/block")
32 DISK_DEVICE=$(basename "${DISK}")
33 RAID_DISK_DEVICE=$(basename "${RAID_DISK}")
34 RAID_DISK_INDEX=${RAID_DISK_DEVICE#rd}
35 RAID_DISK_STATE=$(cat "${RAID_DISK}/state")
36
37 DISK_SET=""
38 # Determine disk set using logic from mdadm: https://github.com/neilbrown/mdadm/commit/2c096ebe4b
39 if [[ ${RAID_DISK_STATE} == "in_sync" && ${MD_LEVEL} == 10 && $((MD_LAYOUT & ~0x1ffff)) ]]; then
40 NEAR_COPIES=$((MD_LAYOUT & 0xff))
41 FAR_COPIES=$(((MD_LAYOUT >> 8) & 0xff))
42 COPIES=$((NEAR_COPIES * FAR_COPIES))
43
44 if [[ $((MD_NUM_RAID_DISKS % COPIES == 0)) && $((COPIES <= 26)) ]]; then
45 DISK_SET=$((RAID_DISK_INDEX % COPIES))
46 fi
47 fi
48
49 echo -n "node_md_disk_info{disk_device=\"${DISK_DEVICE}\", md_device=\"${MD_DEVICE_NUM}\""
50 if [[ -n ${DISK_SET} ]]; then
51 SET_LETTERS=({A..Z})
52 echo -n ", md_set=\"${SET_LETTERS[${DISK_SET}]}\""
53 fi
54 echo "} 1"
55 done
56
57 # Get output from mdadm --detail (Note: root/sudo required)
58 MDADM_DETAIL_OUTPUT=$(mdadm --detail /dev/"${MD_DEVICE_NUM}")
59
60 # Output RAID "Devices", "Size" and "Event" metrics, from the output of "mdadm --detail"
61 while IFS= read -r line ; do
62 # Filter out these keys that have numeric values that increment up
63 if echo "$line" | grep -E -q "Devices :|Array Size :| Used Dev Size :|Events :"; then
64 MDADM_DETAIL_KEY=$(echo "$line" | cut -d ":" -f 1 | tr -cd '[a-zA-Z0-9]._-')
65 MDADM_DETAIL_VALUE=$(echo "$line" | cut -d ":" -f 2 | cut -d " " -f 2 | sed 's:^ ::')
66 echo "node_md_info_${MDADM_DETAIL_KEY}{md_device=\"${MD_DEVICE_NUM}\", md_name=\"${MD_DEVICE}\", raid_level=\"${MD_LEVEL}\", md_num_raid_disks=\"${MD_NUM_RAID_DISKS}\", md_metadata_version=\"${MD_METADATA_VERSION}\"} ${MDADM_DETAIL_VALUE}"
67 fi
68 done <<< "$MDADM_DETAIL_OUTPUT"
69
70 # Output RAID detail metrics info from the output of "mdadm --detail"
71 # NOTE: Sending this info as labels rather than separate metrics, because some of them can be strings.
72 echo -n "node_md_info{md_device=\"${MD_DEVICE_NUM}\", md_name=\"${MD_DEVICE}\", raid_level=\"${MD_LEVEL}\", md_num_raid_disks=\"${MD_NUM_RAID_DISKS}\", md_metadata_version=\"${MD_METADATA_VERSION}\""
73 while IFS= read -r line ; do
74 # Filter for lines with a ":", to use for Key/Value pairs in labels
75 if echo "$line" | grep -E -q ":" ; then
76 # Exclude lines with these keys, as they're values are numbers that increment up and captured in individual metrics above
77 if echo "$line" | grep -E -qv "Array Size|Used Dev Size|Events|Update Time" ; then
78 echo -n ", "
79 MDADM_DETAIL_KEY=$(echo "$line" | cut -d ":" -f 1 | tr -cd '[a-zA-Z0-9]._-')
80 MDADM_DETAIL_VALUE=$(echo "$line" | cut -d ":" -f 2- | sed 's:^ ::')
81 echo -n "${MDADM_DETAIL_KEY}=\"${MDADM_DETAIL_VALUE}\""
82 fi
83 fi
84 done <<< "$MDADM_DETAIL_OUTPUT"
85 echo "} 1"
86 )
87done
diff --git a/text_collector_examples/mellanox_hca_temp b/text_collector_examples/mellanox_hca_temp
deleted file mode 100755
index 0a9e2b0..0000000
--- a/text_collector_examples/mellanox_hca_temp
+++ /dev/null
@@ -1,59 +0,0 @@
1#!/bin/bash
2set -eu
3
4# Script to read Mellanox HCA temperature using the Mellanox mget_temp_ext tool
5
6# Copyright 2018 The Prometheus Authors
7#
8# Licensed under the Apache License, Version 2.0 (the "License");
9# you may not use this file except in compliance with the License.
10# You may obtain a copy of the License at
11#
12# http://www.apache.org/licenses/LICENSE-2.0
13#
14# Unless required by applicable law or agreed to in writing, software
15# distributed under the License is distributed on an "AS IS" BASIS,
16# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17# See the License for the specific language governing permissions and
18# limitations under the License.
19#
20# Author: Jan Phillip Greimann <jan.greimann@cloud.ionos.com>
21
22# check if root
23if [ "$EUID" -ne 0 ]; then
24 echo "${0##*/}: Please run as root!" >&2
25 exit 1
26fi
27
28# check if programs are installed
29if ! command -v mget_temp_ext >/dev/null 2>&1; then
30 echo "${0##*/}: mget_temp_ext is not installed. Aborting." >&2
31 exit 1
32fi
33
34cat <<EOF
35# HELP node_infiniband_hca_temp_celsius Celsius temperature of Mellanox InfiniBand HCA.
36# TYPE node_infiniband_hca_temp_celsius gauge
37EOF
38
39# run for each found Mellanox device
40for dev in /sys/class/infiniband/*; do
41 if test ! -d "$dev"; then
42 continue
43 fi
44 device="${dev##*/}"
45
46 # get temperature
47 if temperature="$(mget_temp_ext -d "${device}")"; then
48 # output
49 echo "node_infiniband_hca_temp_celsius{hca_device=\"${device}\"} ${temperature//[[:space:]]/}"
50 else
51 echo "${0##*/}: Failed to get temperature from InfiniBand HCA '${device}'!" >&2
52 fi
53done
54
55# if device is empty, no device was found
56if [ -z "${device-}" ]; then
57 echo "${0##*/}: No InfiniBand HCA device found!" >&2
58 exit 1
59fi
diff --git a/text_collector_examples/multipathd_info b/text_collector_examples/multipathd_info
deleted file mode 100755
index cddbb2b..0000000
--- a/text_collector_examples/multipathd_info
+++ /dev/null
@@ -1,9 +0,0 @@
1#!/bin/sh
2#
3# Description: Expose device mapper multipathing metrics from multipathd.
4#
5# Author: Saket Sinha <saket.sinha@cloud.ionos.com>
6
7echo '# HELP node_dmpath_info State info for dev-mapper path'
8echo '# TYPE node_dmpath_info gauge'
9/sbin/multipathd show paths format '%d %t %T' | /usr/bin/awk '{ if ( NR > 1) {print "node_dmpath_info{device=\""$1"\"," "dm_path_state=\""$2"\"," "path_state=\""$3"\"}" " 1"}}'
diff --git a/text_collector_examples/ntpd_metrics.py b/text_collector_examples/ntpd_metrics.py
deleted file mode 100755
index ab55a13..0000000
--- a/text_collector_examples/ntpd_metrics.py
+++ /dev/null
@@ -1,122 +0,0 @@
1#!/usr/bin/env python3
2#
3# Description: Extract NTPd metrics from ntpq -np.
4# Author: Ben Kochie <superq@gmail.com>
5
6import re
7import subprocess
8import sys
9
10# NTP peers status, with no DNS lookups.
11ntpq_cmd = ['ntpq', '-np']
12ntpq_rv_cmd = ['ntpq', '-c', 'rv 0 offset,sys_jitter,rootdisp,rootdelay']
13
14# Regex to match all of the fields in the output of ntpq -np
15metrics_fields = [
16 '^(?P<status>.)(?P<remote>[\w\.]+)',
17 '(?P<refid>[\w\.]+)',
18 '(?P<stratum>\d+)',
19 '(?P<type>\w)',
20 '(?P<when>\d+)',
21 '(?P<poll>\d+)',
22 '(?P<reach>\d+)',
23 '(?P<delay>\d+\.\d+)',
24 '(?P<offset>-?\d+\.\d+)',
25 '(?P<jitter>\d+\.\d+)',
26]
27metrics_re = '\s+'.join(metrics_fields)
28
29# Remote types
30# http://support.ntp.org/bin/view/Support/TroubleshootingNTP
31remote_types = {
32 'l': 'local',
33 'u': 'unicast',
34 'm': 'multicast',
35 'b': 'broadcast',
36 '-': 'netaddr',
37}
38
39# Status codes:
40# http://www.eecis.udel.edu/~mills/ntp/html/decode.html#peer
41status_types = {
42 ' ': 0,
43 'x': 1,
44 '.': 2,
45 '-': 3,
46 '+': 4,
47 '#': 5,
48 '*': 6,
49 'o': 7,
50}
51
52
53# Run the ntpq command.
54def get_output(command):
55 try:
56 output = subprocess.check_output(command, stderr=subprocess.DEVNULL)
57 except subprocess.CalledProcessError as e:
58 return None
59 return output.decode()
60
61
62# Print metrics in Prometheus format.
63def print_prometheus(metric, values):
64 print("# HELP ntpd_%s NTPd metric for %s" % (metric, metric))
65 print("# TYPE ntpd_%s gauge" % (metric))
66 for labels in values:
67 if labels is None:
68 print("ntpd_%s %f" % (metric, values[labels]))
69 else:
70 print("ntpd_%s{%s} %f" % (metric, labels, values[labels]))
71
72
73# Parse raw ntpq lines.
74def parse_line(line):
75 if re.match('\s+remote\s+refid', line):
76 return None
77 if re.match('=+', line):
78 return None
79 if re.match('.+\.(LOCL|POOL)\.', line):
80 return None
81 if re.match('^$', line):
82 return None
83 return re.match(metrics_re, line)
84
85
86# Main function
87def main(argv):
88 ntpq = get_output(ntpq_cmd)
89 peer_status_metrics = {}
90 delay_metrics = {}
91 offset_metrics = {}
92 jitter_metrics = {}
93 for line in ntpq.split('\n'):
94 metric_match = parse_line(line)
95 if metric_match is None:
96 continue
97 remote = metric_match.group('remote')
98 refid = metric_match.group('refid')
99 stratum = metric_match.group('stratum')
100 remote_type = remote_types[metric_match.group('type')]
101 common_labels = "remote=\"%s\",reference=\"%s\"" % (remote, refid)
102 peer_labels = "%s,stratum=\"%s\",type=\"%s\"" % (common_labels, stratum, remote_type)
103
104 peer_status_metrics[peer_labels] = float(status_types[metric_match.group('status')])
105 delay_metrics[common_labels] = float(metric_match.group('delay'))
106 offset_metrics[common_labels] = float(metric_match.group('offset'))
107 jitter_metrics[common_labels] = float(metric_match.group('jitter'))
108
109 print_prometheus('peer_status', peer_status_metrics)
110 print_prometheus('delay_milliseconds', delay_metrics)
111 print_prometheus('offset_milliseconds', offset_metrics)
112 print_prometheus('jitter_milliseconds', jitter_metrics)
113
114 ntpq_rv = get_output(ntpq_rv_cmd)
115 for metric in ntpq_rv.split(','):
116 metric_name, metric_value = metric.strip().split('=')
117 print_prometheus(metric_name, {None: float(metric_value)})
118
119
120# Go go go!
121if __name__ == "__main__":
122 main(sys.argv[1:])
diff --git a/text_collector_examples/nvme_metrics.sh b/text_collector_examples/nvme_metrics.sh
deleted file mode 100755
index 5cc23cf..0000000
--- a/text_collector_examples/nvme_metrics.sh
+++ /dev/null
@@ -1,97 +0,0 @@
1#!/usr/bin/env bash
2set -eu
3
4# Dependencies: nvme-cli, jq (packages)
5# Based on code from
6# - https://github.com/prometheus/node_exporter/blob/master/text_collector_examples/smartmon.sh
7# - https://github.com/prometheus/node_exporter/blob/master/text_collector_examples/mellanox_hca_temp
8# - https://github.com/vorlon/check_nvme/blob/master/check_nvme.sh
9#
10# Author: Henk <henk@wearespindle.com>
11
12# Check if we are root
13if [ "$EUID" -ne 0 ]; then
14 echo "${0##*/}: Please run as root!" >&2
15 exit 1
16fi
17
18# Check if programs are installed
19if ! command -v nvme >/dev/null 2>&1; then
20 echo "${0##*/}: nvme is not installed. Aborting." >&2
21 exit 1
22fi
23
24output_format_awk="$(
25 cat <<'OUTPUTAWK'
26BEGIN { v = "" }
27v != $1 {
28 print "# HELP nvme_" $1 " SMART metric " $1;
29 if ($1 ~ /_total$/)
30 print "# TYPE nvme_" $1 " counter";
31 else
32 print "# TYPE nvme_" $1 " gauge";
33 v = $1
34}
35{print "nvme_" $0}
36OUTPUTAWK
37)"
38
39format_output() {
40 sort | awk -F'{' "${output_format_awk}"
41}
42
43# Get the nvme-cli version
44nvme_version="$(nvme version | awk '$1 == "nvme" {print $3}')"
45echo "nvmecli{version=\"${nvme_version}\"} 1" | format_output
46
47# Get devices
48device_list="$(nvme list | awk '/^\/dev/{print $1}')"
49
50# Loop through the NVMe devices
51for device in ${device_list}; do
52 json_check="$(nvme smart-log -o json "${device}")"
53 disk="$(echo "${device}" | cut -c6-10)"
54
55 # The temperature value in JSON is in Kelvin, we want Celsius
56 value_temperature="$(echo "$json_check" | jq '.temperature - 273')"
57 echo "temperature_celcius{device=\"${disk}\"} ${value_temperature}"
58
59 value_available_spare="$(echo "$json_check" | jq '.avail_spare / 100')"
60 echo "available_spare_ratio{device=\"${disk}\"} ${value_available_spare}"
61
62 value_available_spare_threshold="$(echo "$json_check" | jq '.spare_thresh / 100')"
63 echo "available_spare_threshold_ratio{device=\"${disk}\"} ${value_available_spare_threshold}"
64
65 value_percentage_used="$(echo "$json_check" | jq '.percent_used / 100')"
66 echo "percentage_used_ratio{device=\"${disk}\"} ${value_percentage_used}"
67
68 value_critical_warning="$(echo "$json_check" | jq '.critical_warning')"
69 echo "critical_warning_total{device=\"${disk}\"} ${value_critical_warning}"
70
71 value_media_errors="$(echo "$json_check" | jq '.media_errors')"
72 echo "media_errors_total{device=\"${disk}\"} ${value_media_errors}"
73
74 value_num_err_log_entries="$(echo "$json_check" | jq '.num_err_log_entries')"
75 echo "num_err_log_entries_total{device=\"${disk}\"} ${value_num_err_log_entries}"
76
77 value_power_cycles="$(echo "$json_check" | jq '.power_cycles')"
78 echo "power_cycles_total{device=\"${disk}\"} ${value_power_cycles}"
79
80 value_power_on_hours="$(echo "$json_check" | jq '.power_on_hours')"
81 echo "power_on_hours_total{device=\"${disk}\"} ${value_power_on_hours}"
82
83 value_controller_busy_time="$(echo "$json_check" | jq '.controller_busy_time')"
84 echo "controller_busy_time_seconds{device=\"${disk}\"} ${value_controller_busy_time}"
85
86 value_data_units_written="$(echo "$json_check" | jq '.data_units_written')"
87 echo "data_units_written_total{device=\"${disk}\"} ${value_data_units_written}"
88
89 value_data_units_read="$(echo "$json_check" | jq '.data_units_read')"
90 echo "data_units_read_total{device=\"${disk}\"} ${value_data_units_read}"
91
92 value_host_read_commands="$(echo "$json_check" | jq '.host_read_commands')"
93 echo "host_read_commands_total{device=\"${disk}\"} ${value_host_read_commands}"
94
95 value_host_write_commands="$(echo "$json_check" | jq '.host_write_commands')"
96 echo "host_write_commands_total{device=\"${disk}\"} ${value_host_write_commands}"
97done | format_output
diff --git a/text_collector_examples/pacman.sh b/text_collector_examples/pacman.sh
deleted file mode 100755
index 82ac4cf..0000000
--- a/text_collector_examples/pacman.sh
+++ /dev/null
@@ -1,33 +0,0 @@
1#!/bin/bash
2#
3#
4# Description: Expose metrics from pacman updates
5# If installed The bash script *checkupdates*, included with the
6# *pacman-contrib* package, is used to calculate the number of pending updates.
7# Otherwise *pacman* is used for calculation.
8#
9# Author: Sven Haardiek <sven@haardiek.de>
10
11set -o errexit
12set -o nounset
13set -o pipefail
14
15if [ -x /usr/bin/checkupdates ]
16then
17 updates=$(/usr/bin/checkupdates | wc -l)
18 cache=0
19else
20 if ! updates=$(/usr/bin/pacman -Qu | wc -l)
21 then
22 updates=0
23 fi
24 cache=1
25fi
26
27echo "# HELP updates_pending number of pending updates from pacman"
28echo "# TYPE updates_pending gauge"
29echo "pacman_updates_pending $updates"
30
31echo "# HELP pacman_updates_pending_from_cache pending updates information are from cache"
32echo "# TYPE pacman_updates_pending_from_cache gauge"
33echo "pacman_updates_pending_from_cache $cache"
diff --git a/text_collector_examples/smartmon.py b/text_collector_examples/smartmon.py
deleted file mode 100755
index 7dbf26e..0000000
--- a/text_collector_examples/smartmon.py
+++ /dev/null
@@ -1,378 +0,0 @@
1#!/usr/bin/env python3
2import argparse
3import collections
4import csv
5import datetime
6import decimal
7import re
8import shlex
9import subprocess
10
11device_info_re = re.compile(r'^(?P<k>[^:]+?)(?:(?:\sis|):)\s*(?P<v>.*)$')
12
13ata_error_count_re = re.compile(
14 r'^Error (\d+) \[\d+\] occurred', re.MULTILINE)
15
16self_test_re = re.compile(r'^SMART.*(PASSED|OK)$', re.MULTILINE)
17
18device_info_map = {
19 'Vendor': 'vendor',
20 'Product': 'product',
21 'Revision': 'revision',
22 'Logical Unit id': 'lun_id',
23 'Model Family': 'model_family',
24 'Device Model': 'device_model',
25 'Serial Number': 'serial_number',
26 'Firmware Version': 'firmware_version',
27}
28
29smart_attributes_whitelist = {
30 'airflow_temperature_cel',
31 'command_timeout',
32 'current_pending_sector',
33 'end_to_end_error',
34 'erase_fail_count_total',
35 'g_sense_error_rate',
36 'hardware_ecc_recovered',
37 'host_reads_mib',
38 'host_reads_32mib',
39 'host_writes_mib',
40 'host_writes_32mib',
41 'load_cycle_count',
42 'media_wearout_indicator',
43 'wear_leveling_count',
44 'nand_writes_1gib',
45 'offline_uncorrectable',
46 'power_cycle_count',
47 'power_on_hours',
48 'program_fail_count',
49 'raw_read_error_rate',
50 'reallocated_event_count',
51 'reallocated_sector_ct',
52 'reported_uncorrect',
53 'sata_downshift_count',
54 'seek_error_rate',
55 'spin_retry_count',
56 'spin_up_time',
57 'start_stop_count',
58 'temperature_case',
59 'temperature_celsius',
60 'temperature_internal',
61 'total_lbas_read',
62 'total_lbas_written',
63 'udma_crc_error_count',
64 'unsafe_shutdown_count',
65 'workld_host_reads_perc',
66 'workld_media_wear_indic',
67 'workload_minutes',
68}
69
70Metric = collections.namedtuple('Metric', 'name labels value')
71
72SmartAttribute = collections.namedtuple('SmartAttribute', [
73 'id', 'name', 'flag', 'value', 'worst', 'threshold', 'type', 'updated',
74 'when_failed', 'raw_value',
75])
76
77
78class Device(collections.namedtuple('DeviceBase', 'path opts')):
79 """Representation of a device as found by smartctl --scan output."""
80
81 @property
82 def type(self):
83 return self.opts.type
84
85 @property
86 def base_labels(self):
87 return {'disk': self.path}
88
89 def smartctl_select(self):
90 return ['--device', self.type, self.path]
91
92
93def metric_key(metric, prefix=''):
94 return '{prefix}{metric.name}'.format(prefix=prefix, metric=metric)
95
96
97def metric_format(metric, prefix=''):
98 key = metric_key(metric, prefix)
99 labels = ','.join(
100 '{k}="{v}"'.format(k=k, v=v) for k, v in metric.labels.items())
101 value = decimal.Decimal(metric.value)
102
103 return '{key}{{{labels}}} {value}'.format(
104 key=key, labels=labels, value=value)
105
106
107def metric_print_meta(metric, prefix=''):
108 key = metric_key(metric, prefix)
109 print('# HELP {key} SMART metric {metric.name}'.format(
110 key=key, metric=metric))
111 print('# TYPE {key} gauge'.format(key=key, metric=metric))
112
113
114def metric_print(metric, prefix=''):
115 print(metric_format(metric, prefix))
116
117
118def smart_ctl(*args, check=True):
119 """Wrapper around invoking the smartctl binary.
120
121 Returns:
122 (str) Data piped to stdout by the smartctl subprocess.
123 """
124 try:
125 return subprocess.run(
126 ['smartctl', *args], stdout=subprocess.PIPE, check=check
127 ).stdout.decode('utf-8')
128 except subprocess.CalledProcessError as e:
129 return e.output.decode('utf-8')
130
131def smart_ctl_version():
132 return smart_ctl('-V').split('\n')[0].split()[1]
133
134
135def find_devices():
136 """Find SMART devices.
137
138 Yields:
139 (Device) Single device found by smartctl.
140 """
141 parser = argparse.ArgumentParser()
142 parser.add_argument('-d', '--device', dest='type')
143
144 devices = smart_ctl('--scan-open')
145
146 for device in devices.split('\n'):
147 device = device.strip()
148 if not device:
149 continue
150
151 tokens = shlex.split(device, comments=True)
152 if not tokens:
153 continue
154
155 yield Device(tokens[0], parser.parse_args(tokens[1:]))
156
157
158def device_is_active(device):
159 """Returns whenever the given device is currently active or not.
160
161 Args:
162 device: (Device) Device in question.
163
164 Returns:
165 (bool) True if the device is active and False otherwise.
166 """
167 try:
168 smart_ctl('--nocheck', 'standby', *device.smartctl_select())
169 except subprocess.CalledProcessError:
170 return False
171
172 return True
173
174
175def device_info(device):
176 """Query device for basic model information.
177
178 Args:
179 device: (Device) Device in question.
180
181 Returns:
182 (generator): Generator yielding:
183
184 key (str): Key describing the value.
185 value (str): Actual value.
186 """
187 info_lines = smart_ctl(
188 '--info', *device.smartctl_select()
189 ).strip().split('\n')[3:]
190
191 matches = (device_info_re.match(l) for l in info_lines)
192 return (m.groups() for m in matches if m is not None)
193
194
195def device_smart_capabilities(device):
196 """Returns SMART capabilities of the given device.
197
198 Args:
199 device: (Device) Device in question.
200
201 Returns:
202 (tuple): tuple containing:
203
204 (bool): True whenever SMART is available, False otherwise.
205 (bool): True whenever SMART is enabled, False otherwise.
206 """
207 groups = device_info(device)
208
209 state = {
210 g[1].split(' ', 1)[0]
211 for g in groups if g[0] == 'SMART support'}
212
213 smart_available = 'Available' in state
214 smart_enabled = 'Enabled' in state
215
216 return smart_available, smart_enabled
217
218
219def collect_device_info(device):
220 """Collect basic device information.
221
222 Args:
223 device: (Device) Device in question.
224
225 Yields:
226 (Metric) metrics describing general device information.
227 """
228 values = dict(device_info(device))
229 yield Metric('device_info', {
230 **device.base_labels,
231 **{v: values[k] for k, v in device_info_map.items() if k in values}
232 }, True)
233
234
235def collect_device_health_self_assessment(device):
236 """Collect metric about the device health self assessment.
237
238 Args:
239 device: (Device) Device in question.
240
241 Yields:
242 (Metric) Device health self assessment.
243 """
244 out = smart_ctl('--health', *device.smartctl_select())
245
246 if self_test_re.search(out):
247 self_assessment_passed = True
248 else:
249 self_assessment_passed = False
250
251 yield Metric(
252 'device_smart_healthy', device.base_labels, self_assessment_passed)
253
254
255def collect_ata_metrics(device):
256 # Fetch SMART attributes for the given device.
257 attributes = smart_ctl(
258 '--attributes', *device.smartctl_select()
259 )
260
261 # replace multiple occurrences of whitespace with a single whitespace
262 # so that the CSV Parser recognizes individual columns properly.
263 attributes = re.sub(r'[\t\x20]+', ' ', attributes)
264
265 # Turn smartctl output into a list of lines and skip to the table of
266 # SMART attributes.
267 attribute_lines = attributes.strip().split('\n')[7:]
268
269 reader = csv.DictReader(
270 (l.strip() for l in attribute_lines),
271 fieldnames=SmartAttribute._fields[:-1],
272 restkey=SmartAttribute._fields[-1], delimiter=' ')
273 for entry in reader:
274 # We're only interested in the SMART attributes that are
275 # whitelisted here.
276 entry['name'] = entry['name'].lower()
277 if entry['name'] not in smart_attributes_whitelist:
278 continue
279
280 # Ensure that only the numeric parts are fetched from the raw_value.
281 # Attributes such as 194 Temperature_Celsius reported by my SSD
282 # are in the format of "36 (Min/Max 24/40)" which can't be expressed
283 # properly as a prometheus metric.
284 m = re.match('^(\d+)', ' '.join(entry['raw_value']))
285 if not m:
286 continue
287 entry['raw_value'] = m.group(1)
288
289 if entry['name'] in smart_attributes_whitelist:
290 labels = {
291 'name': entry['name'],
292 **device.base_labels,
293 }
294
295 for col in 'value', 'worst', 'threshold':
296 yield Metric(
297 'attr_{col}'.format(name=entry["name"], col=col),
298 labels, entry[col])
299
300
301def collect_ata_error_count(device):
302 """Inspect the device error log and report the amount of entries.
303
304 Args:
305 device: (Device) Device in question.
306
307 Yields:
308 (Metric) Device error count.
309 """
310 error_log = smart_ctl(
311 '-l', 'xerror,1', *device.smartctl_select(), check=False)
312
313 m = ata_error_count_re.search(error_log)
314
315 error_count = m.group(1) if m is not None else 0
316
317 yield Metric('device_errors', device.base_labels, error_count)
318
319
320def collect_disks_smart_metrics():
321 now = int(datetime.datetime.utcnow().timestamp())
322
323 for device in find_devices():
324 yield Metric('smartctl_run', device.base_labels, now)
325
326 is_active = device_is_active(device)
327
328 yield Metric('device_active', device.base_labels, is_active)
329
330 # Skip further metrics collection to prevent the disk from
331 # spinning up.
332 if not is_active:
333 continue
334
335 yield from collect_device_info(device)
336
337 smart_available, smart_enabled = device_smart_capabilities(device)
338
339 yield Metric(
340 'device_smart_available', device.base_labels, smart_available)
341 yield Metric(
342 'device_smart_enabled', device.base_labels, smart_enabled)
343
344 # Skip further metrics collection here if SMART is disabled
345 # on the device. Further smartctl invocations would fail
346 # anyways.
347 if not smart_available:
348 continue
349
350 yield from collect_device_health_self_assessment(device)
351
352 if device.type.startswith('sat'):
353 yield from collect_ata_metrics(device)
354
355 yield from collect_ata_error_count(device)
356
357
358def main():
359 version_metric = Metric('smartctl_version', {
360 'version': smart_ctl_version()
361 }, True)
362 metric_print_meta(version_metric, 'smartmon_')
363 metric_print(version_metric, 'smartmon_')
364
365 metrics = list(collect_disks_smart_metrics())
366 metrics.sort(key=lambda i: i.name)
367
368 previous_name = None
369 for m in metrics:
370 if m.name != previous_name:
371 metric_print_meta(m, 'smartmon_')
372
373 previous_name = m.name
374
375 metric_print(m, 'smartmon_')
376
377if __name__ == '__main__':
378 main()
diff --git a/text_collector_examples/smartmon.sh b/text_collector_examples/smartmon.sh
deleted file mode 100755
index 8a75d29..0000000
--- a/text_collector_examples/smartmon.sh
+++ /dev/null
@@ -1,194 +0,0 @@
1#!/bin/bash
2# Script informed by the collectd monitoring script for smartmontools (using smartctl)
3# by Samuel B. <samuel_._behan_(at)_dob_._sk> (c) 2012
4# source at: http://devel.dob.sk/collectd-scripts/
5
6# TODO: This probably needs to be a little more complex. The raw numbers can have more
7# data in them than you'd think.
8# http://arstechnica.com/civis/viewtopic.php?p=22062211
9
10# Formatting done via shfmt -i 2
11# https://github.com/mvdan/sh
12
13parse_smartctl_attributes_awk="$(
14 cat <<'SMARTCTLAWK'
15$1 ~ /^ *[0-9]+$/ && $2 ~ /^[a-zA-Z0-9_-]+$/ {
16 gsub(/-/, "_");
17 printf "%s_value{%s,smart_id=\"%s\"} %d\n", $2, labels, $1, $4
18 printf "%s_worst{%s,smart_id=\"%s\"} %d\n", $2, labels, $1, $5
19 printf "%s_threshold{%s,smart_id=\"%s\"} %d\n", $2, labels, $1, $6
20 printf "%s_raw_value{%s,smart_id=\"%s\"} %e\n", $2, labels, $1, $10
21}
22SMARTCTLAWK
23)"
24
25smartmon_attrs="$(
26 cat <<'SMARTMONATTRS'
27airflow_temperature_cel
28command_timeout
29current_pending_sector
30end_to_end_error
31erase_fail_count
32g_sense_error_rate
33hardware_ecc_recovered
34host_reads_mib
35host_reads_32mib
36host_writes_mib
37host_writes_32mib
38load_cycle_count
39media_wearout_indicator
40wear_leveling_count
41nand_writes_1gib
42offline_uncorrectable
43power_cycle_count
44power_on_hours
45program_fail_count
46raw_read_error_rate
47reallocated_event_count
48reallocated_sector_ct
49reported_uncorrect
50sata_downshift_count
51seek_error_rate
52spin_retry_count
53spin_up_time
54start_stop_count
55temperature_case
56temperature_celsius
57temperature_internal
58total_lbas_read
59total_lbas_written
60udma_crc_error_count
61unsafe_shutdown_count
62workld_host_reads_perc
63workld_media_wear_indic
64workload_minutes
65SMARTMONATTRS
66)"
67smartmon_attrs="$(echo ${smartmon_attrs} | xargs | tr ' ' '|')"
68
69parse_smartctl_attributes() {
70 local disk="$1"
71 local disk_type="$2"
72 local labels="disk=\"${disk}\",type=\"${disk_type}\""
73 local vars="$(echo "${smartmon_attrs}" | xargs | tr ' ' '|')"
74 sed 's/^ \+//g' |
75 awk -v labels="${labels}" "${parse_smartctl_attributes_awk}" 2>/dev/null |
76 tr A-Z a-z |
77 grep -E "(${smartmon_attrs})"
78}
79
80parse_smartctl_scsi_attributes() {
81 local disk="$1"
82 local disk_type="$2"
83 local labels="disk=\"${disk}\",type=\"${disk_type}\""
84 while read line; do
85 attr_type="$(echo "${line}" | tr '=' ':' | cut -f1 -d: | sed 's/^ \+//g' | tr ' ' '_')"
86 attr_value="$(echo "${line}" | tr '=' ':' | cut -f2 -d: | sed 's/^ \+//g')"
87 case "${attr_type}" in
88 number_of_hours_powered_up_) power_on="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;;
89 Current_Drive_Temperature) temp_cel="$(echo ${attr_value} | cut -f1 -d' ' | awk '{ printf "%e\n", $1 }')" ;;
90 Blocks_sent_to_initiator_) lbas_read="$(echo ${attr_value} | awk '{ printf "%e\n", $1 }')" ;;
91 Blocks_received_from_initiator_) lbas_written="$(echo ${attr_value} | awk '{ printf "%e\n", $1 }')" ;;
92 Accumulated_start-stop_cycles) power_cycle="$(echo ${attr_value} | awk '{ printf "%e\n", $1 }')" ;;
93 Elements_in_grown_defect_list) grown_defects="$(echo ${attr_value} | awk '{ printf "%e\n", $1 }')" ;;
94 esac
95 done
96 [ ! -z "$power_on" ] && echo "power_on_hours_raw_value{${labels},smart_id=\"9\"} ${power_on}"
97 [ ! -z "$temp_cel" ] && echo "temperature_celsius_raw_value{${labels},smart_id=\"194\"} ${temp_cel}"
98 [ ! -z "$lbas_read" ] && echo "total_lbas_read_raw_value{${labels},smart_id=\"242\"} ${lbas_read}"
99 [ ! -z "$lbas_written" ] && echo "total_lbas_written_raw_value{${labels},smart_id=\"242\"} ${lbas_written}"
100 [ ! -z "$power_cycle" ] && echo "power_cycle_count_raw_value{${labels},smart_id=\"12\"} ${power_cycle}"
101 [ ! -z "$grown_defects" ] && echo "grown_defects_count_raw_value{${labels},smart_id=\"12\"} ${grown_defects}"
102}
103
104parse_smartctl_info() {
105 local -i smart_available=0 smart_enabled=0 smart_healthy=0
106 local disk="$1" disk_type="$2"
107 local model_family='' device_model='' serial_number='' fw_version='' vendor='' product='' revision='' lun_id=''
108 while read line; do
109 info_type="$(echo "${line}" | cut -f1 -d: | tr ' ' '_')"
110 info_value="$(echo "${line}" | cut -f2- -d: | sed 's/^ \+//g' | sed 's/"/\\"/')"
111 case "${info_type}" in
112 Model_Family) model_family="${info_value}" ;;
113 Device_Model) device_model="${info_value}" ;;
114 Serial_Number) serial_number="${info_value}" ;;
115 Firmware_Version) fw_version="${info_value}" ;;
116 Vendor) vendor="${info_value}" ;;
117 Product) product="${info_value}" ;;
118 Revision) revision="${info_value}" ;;
119 Logical_Unit_id) lun_id="${info_value}" ;;
120 esac
121 if [[ "${info_type}" == 'SMART_support_is' ]]; then
122 case "${info_value:0:7}" in
123 Enabled) smart_enabled=1 ;;
124 Availab) smart_available=1 ;;
125 Unavail) smart_available=0 ;;
126 esac
127 fi
128 if [[ "${info_type}" == 'SMART_overall-health_self-assessment_test_result' ]]; then
129 case "${info_value:0:6}" in
130 PASSED) smart_healthy=1 ;;
131 esac
132 elif [[ "${info_type}" == 'SMART_Health_Status' ]]; then
133 case "${info_value:0:2}" in
134 OK) smart_healthy=1 ;;
135 esac
136 fi
137 done
138 echo "device_info{disk=\"${disk}\",type=\"${disk_type}\",vendor=\"${vendor}\",product=\"${product}\",revision=\"${revision}\",lun_id=\"${lun_id}\",model_family=\"${model_family}\",device_model=\"${device_model}\",serial_number=\"${serial_number}\",firmware_version=\"${fw_version}\"} 1"
139 echo "device_smart_available{disk=\"${disk}\",type=\"${disk_type}\"} ${smart_available}"
140 echo "device_smart_enabled{disk=\"${disk}\",type=\"${disk_type}\"} ${smart_enabled}"
141 echo "device_smart_healthy{disk=\"${disk}\",type=\"${disk_type}\"} ${smart_healthy}"
142}
143
144output_format_awk="$(
145 cat <<'OUTPUTAWK'
146BEGIN { v = "" }
147v != $1 {
148 print "# HELP smartmon_" $1 " SMART metric " $1;
149 print "# TYPE smartmon_" $1 " gauge";
150 v = $1
151}
152{print "smartmon_" $0}
153OUTPUTAWK
154)"
155
156format_output() {
157 sort |
158 awk -F'{' "${output_format_awk}"
159}
160
161smartctl_version="$(/usr/sbin/smartctl -V | head -n1 | awk '$1 == "smartctl" {print $2}')"
162
163echo "smartctl_version{version=\"${smartctl_version}\"} 1" | format_output
164
165if [[ "$(expr "${smartctl_version}" : '\([0-9]*\)\..*')" -lt 6 ]]; then
166 exit
167fi
168
169device_list="$(/usr/sbin/smartctl --scan-open | awk '/^\/dev/{print $1 "|" $3}')"
170
171for device in ${device_list}; do
172 disk="$(echo ${device} | cut -f1 -d'|')"
173 type="$(echo ${device} | cut -f2 -d'|')"
174 active=1
175 echo "smartctl_run{disk=\"${disk}\",type=\"${type}\"}" "$(TZ=UTC date '+%s')"
176 # Check if the device is in a low-power mode
177 /usr/sbin/smartctl -n standby -d "${type}" "${disk}" > /dev/null || active=0
178 echo "device_active{disk=\"${disk}\",type=\"${type}\"}" "${active}"
179 # Skip further metrics to prevent the disk from spinning up
180 test ${active} -eq 0 && continue
181 # Get the SMART information and health
182 /usr/sbin/smartctl -i -H -d "${type}" "${disk}" | parse_smartctl_info "${disk}" "${type}"
183 # Get the SMART attributes
184 case ${type} in
185 sat) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_attributes "${disk}" "${type}" ;;
186 sat+megaraid*) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_attributes "${disk}" "${type}" ;;
187 scsi) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_scsi_attributes "${disk}" "${type}" ;;
188 megaraid*) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_scsi_attributes "${disk}" "${type}" ;;
189 *)
190 echo "disk type is not sat, scsi or megaraid but ${type}"
191 exit
192 ;;
193 esac
194done | format_output
diff --git a/text_collector_examples/storcli.py b/text_collector_examples/storcli.py
deleted file mode 100755
index 7dc6f95..0000000
--- a/text_collector_examples/storcli.py
+++ /dev/null
@@ -1,242 +0,0 @@
1#!/usr/bin/env python3
2"""
3Script to parse StorCLI's JSON output and expose
4MegaRAID health as Prometheus metrics.
5
6Tested against StorCLI 'Ver 1.14.12 Nov 25, 2014'.
7
8StorCLI reference manual:
9http://docs.avagotech.com/docs/12352476
10
11Advanced Software Options (ASO) not exposed as metrics currently.
12
13JSON key abbreviations used by StorCLI are documented in the standard command
14output, i.e. when you omit the trailing 'J' from the command.
15
16Formatting done with YAPF:
17$ yapf -i --style '{COLUMN_LIMIT: 99}' storcli.py
18"""
19
20from __future__ import print_function
21from datetime import datetime
22import argparse
23import collections
24import json
25import os
26import shlex
27import subprocess
28
29DESCRIPTION = """Parses StorCLI's JSON output and exposes MegaRAID health as
30 Prometheus metrics."""
31VERSION = '0.0.3'
32
33storcli_path = ''
34metric_prefix = 'megaraid_'
35metric_list = {}
36metric_list = collections.defaultdict(list)
37
38
39def main(args):
40 """ main """
41 global storcli_path
42 storcli_path = args.storcli_path
43 data = get_storcli_json('/cALL show all J')
44
45 try:
46 # All the information is collected underneath the Controllers key
47 data = data['Controllers']
48
49 for controller in data:
50 response = controller['Response Data']
51
52 handle_common_controller(response)
53 if response['Version']['Driver Name'] == 'megaraid_sas':
54 handle_megaraid_controller(response)
55 elif response['Version']['Driver Name'] == 'mpt3sas':
56 handle_sas_controller(response)
57 except KeyError:
58 pass
59
60 print_all_metrics(metric_list)
61
62def handle_common_controller(response):
63 (controller_index, baselabel) = get_basic_controller_info(response)
64
65 # Split up string to not trigger CodeSpell issues
66 if 'ROC temperature(Degree Celc' + 'ius)' in response['HwCfg'].keys():
67 response['HwCfg']['ROC temperature(Degree Celsius)'] = response['HwCfg'].pop('ROC temperature(Degree Celc' + 'ius)')
68 add_metric('temperature', baselabel, int(response['HwCfg']['ROC temperature(Degree Celsius)']))
69
70def handle_sas_controller(response):
71 (controller_index, baselabel) = get_basic_controller_info(response)
72 add_metric('healthy', baselabel, int(response['Status']['Controller Status'] == 'OK'))
73 add_metric('ports', baselabel, response['HwCfg']['Backend Port Count'])
74 try:
75 # The number of physical disks is half of the number of items in this dict
76 # Every disk is listed twice - once for basic info, again for detailed info
77 add_metric('physical_drives', baselabel,
78 len(response['Physical Device Information'].keys()) / 2)
79 except AttributeError:
80 pass
81
82 for key, basic_disk_info in response['Physical Device Information'].items():
83 if 'Detailed Information' in key:
84 continue
85 create_metrics_of_physical_drive(basic_disk_info[0],
86 response['Physical Device Information'], controller_index)
87
88
89def handle_megaraid_controller(response):
90 (controller_index, baselabel) = get_basic_controller_info(response)
91
92 # BBU Status Optimal value is 0 for cachevault and 32 for BBU
93 add_metric('battery_backup_healthy', baselabel,
94 int(response['Status']['BBU Status'] in [0, 32]))
95 add_metric('degraded', baselabel, int(response['Status']['Controller Status'] == 'Degraded'))
96 add_metric('failed', baselabel, int(response['Status']['Controller Status'] == 'Failed'))
97 add_metric('healthy', baselabel, int(response['Status']['Controller Status'] == 'Optimal'))
98 add_metric('ports', baselabel, response['HwCfg']['Backend Port Count'])
99 add_metric('scheduled_patrol_read', baselabel,
100 int('hrs' in response['Scheduled Tasks']['Patrol Read Reoccurrence']))
101 for cvidx, cvinfo in enumerate(response['Cachevault_Info']):
102 add_metric('cv_temperature', baselabel + ',cvidx="' + str(cvidx) + '"', int(cvinfo['Temp'].replace('C','')))
103
104 time_difference_seconds = -1
105 system_time = datetime.strptime(response['Basics'].get('Current System Date/time'),
106 "%m/%d/%Y, %H:%M:%S")
107 controller_time = datetime.strptime(response['Basics'].get('Current Controller Date/Time'),
108 "%m/%d/%Y, %H:%M:%S")
109 if system_time and controller_time:
110 time_difference_seconds = abs(system_time - controller_time).seconds
111 add_metric('time_difference', baselabel, time_difference_seconds)
112
113 # Make sure it doesn't crash if it's a JBOD setup
114 if 'Drive Groups' in response.keys():
115 add_metric('drive_groups', baselabel, response['Drive Groups'])
116 add_metric('virtual_drives', baselabel, response['Virtual Drives'])
117
118 for virtual_drive in response['VD LIST']:
119 vd_position = virtual_drive.get('DG/VD')
120 drive_group, volume_group = -1, -1
121 if vd_position:
122 drive_group = vd_position.split('/')[0]
123 volume_group = vd_position.split('/')[1]
124 vd_baselabel = 'controller="{0}",DG="{1}",VG="{2}"'.format(controller_index, drive_group,
125 volume_group)
126 vd_info_label = vd_baselabel + ',name="{0}",cache="{1}",type="{2}",state="{3}"'.format(
127 str(virtual_drive.get('Name')).strip(),
128 str(virtual_drive.get('Cache')).strip(),
129 str(virtual_drive.get('TYPE')).strip(),
130 str(virtual_drive.get('State')).strip())
131 add_metric('vd_info', vd_info_label, 1)
132
133 add_metric('physical_drives', baselabel, response['Physical Drives'])
134 if response['Physical Drives'] > 0:
135 data = get_storcli_json('/cALL/eALL/sALL show all J')
136 drive_info = data['Controllers'][controller_index]['Response Data']
137 for physical_drive in response['PD LIST']:
138 create_metrics_of_physical_drive(physical_drive, drive_info, controller_index)
139
140
141def get_basic_controller_info(response):
142 controller_index = response['Basics']['Controller']
143 baselabel = 'controller="{0}"'.format(controller_index)
144
145 controller_info_label = baselabel + ',model="{0}",serial="{1}",fwversion="{2}"'.format(
146 str(response['Basics']['Model']).strip(),
147 str(response['Basics']['Serial Number']).strip(),
148 str(response['Version']['Firmware Version']).strip(),
149 )
150 add_metric('controller_info', controller_info_label, 1)
151
152 return (controller_index, baselabel)
153
154
155def create_metrics_of_physical_drive(physical_drive, detailed_info_array, controller_index):
156 enclosure = physical_drive.get('EID:Slt').split(':')[0]
157 slot = physical_drive.get('EID:Slt').split(':')[1]
158
159 pd_baselabel = 'controller="{0}",enclosure="{1}",slot="{2}"'.format(controller_index, enclosure,
160 slot)
161 pd_info_label = pd_baselabel + \
162 ',disk_id="{0}",interface="{1}",media="{2}",model="{3}",DG="{4}",state="{5}"'.format(
163 str(physical_drive.get('DID')).strip(),
164 str(physical_drive.get('Intf')).strip(),
165 str(physical_drive.get('Med')).strip(),
166 str(physical_drive.get('Model')).strip(),
167 str(physical_drive.get('DG')).strip(),
168 str(physical_drive.get('State')).strip())
169
170 drive_identifier = 'Drive /c' + str(controller_index) + '/e' + str(enclosure) + '/s' + str(
171 slot)
172 if enclosure == ' ':
173 drive_identifier = 'Drive /c' + str(controller_index) + '/s' + str(slot)
174 try:
175 info = detailed_info_array[drive_identifier + ' - Detailed Information']
176 state = info[drive_identifier + ' State']
177 attributes = info[drive_identifier + ' Device attributes']
178 settings = info[drive_identifier + ' Policies/Settings']
179
180 add_metric('pd_shield_counter', pd_baselabel, state['Shield Counter'])
181 add_metric('pd_media_errors', pd_baselabel, state['Media Error Count'])
182 add_metric('pd_other_errors', pd_baselabel, state['Other Error Count'])
183 add_metric('pd_predictive_errors', pd_baselabel, state['Predictive Failure Count'])
184 add_metric('pd_smart_alerted', pd_baselabel,
185 int(state['S.M.A.R.T alert flagged by drive'] == 'Yes'))
186 add_metric('pd_link_speed_gbps', pd_baselabel, attributes['Link Speed'].split('.')[0])
187 add_metric('pd_device_speed_gbps', pd_baselabel, attributes['Device Speed'].split('.')[0])
188 add_metric('pd_commissioned_spare', pd_baselabel,
189 int(settings['Commissioned Spare'] == 'Yes'))
190 add_metric('pd_emergency_spare', pd_baselabel, int(settings['Emergency Spare'] == 'Yes'))
191 pd_info_label += ',firmware="{0}"'.format(attributes['Firmware Revision'].strip())
192 except KeyError:
193 pass
194 add_metric('pd_info', pd_info_label, 1)
195
196
197def add_metric(name, labels, value):
198 global metric_list
199 try:
200 metric_list[name].append({
201 'labels': labels,
202 'value': float(value),
203 })
204 except ValueError:
205 pass
206
207
208def print_all_metrics(metrics):
209 for metric, measurements in metrics.items():
210 print('# HELP {0}{1} MegaRAID {2}'.format(metric_prefix, metric, metric.replace('_', ' ')))
211 print('# TYPE {0}{1} gauge'.format(metric_prefix, metric))
212 for measurement in measurements:
213 if measurement['value'] != 'Unknown':
214 print('{0}{1}{2} {3}'.format(metric_prefix, metric, '{' + measurement['labels'] + '}',
215 measurement['value']))
216
217
218def get_storcli_json(storcli_args):
219 """Get storcli output in JSON format."""
220 # Check if storcli is installed and executable
221 if not (os.path.isfile(storcli_path) and os.access(storcli_path, os.X_OK)):
222 SystemExit(1)
223 storcli_cmd = shlex.split(storcli_path + ' ' + storcli_args)
224 proc = subprocess.Popen(
225 storcli_cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
226 output_json = proc.communicate()[0]
227 data = json.loads(output_json.decode("utf-8"))
228
229 if data["Controllers"][0]["Command Status"]["Status"] != "Success":
230 SystemExit(1)
231 return data
232
233
234if __name__ == "__main__":
235 PARSER = argparse.ArgumentParser(
236 description=DESCRIPTION, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
237 PARSER.add_argument(
238 '--storcli_path', default='/opt/MegaRAID/storcli/storcli64', help='path to StorCLi binary')
239 PARSER.add_argument('--version', action='version', version='%(prog)s {0}'.format(VERSION))
240 ARGS = PARSER.parse_args()
241
242 main(ARGS)
diff --git a/text_collector_examples/yum.sh b/text_collector_examples/yum.sh
deleted file mode 100755
index d0034ee..0000000
--- a/text_collector_examples/yum.sh
+++ /dev/null
@@ -1,18 +0,0 @@
1#!/bin/bash
2#
3# Description: Expose metrics from yum updates.
4#
5# Author: Slawomir Gonet <slawek@otwiera.cz>
6#
7# Based on apt.sh by Ben Kochie <superq@gmail.com>
8
9upgrades=$(/usr/bin/yum -q check-updates | awk 'BEGIN { mute=1 } /Obsoleting Packages/ { mute=0 } mute { print }' | egrep '^\w+\.\w+' | awk '{print $3}' | sort | uniq -c | awk '{print "yum_upgrades_pending{origin=\""$2"\"} "$1}')
10
11echo '# HELP yum_upgrades_pending Yum package pending updates by origin.'
12echo '# TYPE yum_upgrades_pending gauge'
13if [[ -n "${upgrades}" ]] ; then
14 echo "${upgrades}"
15else
16 echo 'yum_upgrades_pending{origin=""} 0'
17fi
18