diff options
author | Christopher Blum <zeichenanonym@web.de> | 2018-09-18 22:43:20 +0200 |
---|---|---|
committer | Ben Kochie <superq@gmail.com> | 2018-09-18 22:43:20 +0200 |
commit | 6aa5cfba6c42ea6a9bd83787b2e595984d91f8d6 (patch) | |
tree | e5016785d168bf727717b358eb47294c43d8e07d | |
parent | 1c9ea46ccab252e217971eebd5da6e492c108ea2 (diff) | |
download | prometheus_node_collector-6aa5cfba6c42ea6a9bd83787b2e595984d91f8d6.tar.bz2 prometheus_node_collector-6aa5cfba6c42ea6a9bd83787b2e595984d91f8d6.tar.xz prometheus_node_collector-6aa5cfba6c42ea6a9bd83787b2e595984d91f8d6.zip |
textfile example script rework (#1074)
* textfile smartmon.sh
Added functions to also parse megaraid disks.
Added parsing to also detect the grown_defects counters.
* textfile storcli.py
Reworked the example file to export lots more information about
megaraid attached controllers, VDs and PDs.
Signed-off-by: Christopher Blum <christopher.blum@profitbricks.com>
-rwxr-xr-x | text_collector_examples/smartmon.sh | 111 | ||||
-rwxr-xr-x | text_collector_examples/storcli.py | 259 |
2 files changed, 227 insertions, 143 deletions
diff --git a/text_collector_examples/smartmon.sh b/text_collector_examples/smartmon.sh index 9b0c7d4..7b873fa 100755 --- a/text_collector_examples/smartmon.sh +++ b/text_collector_examples/smartmon.sh | |||
@@ -7,7 +7,11 @@ | |||
7 | # data in them than you'd think. | 7 | # data in them than you'd think. |
8 | # http://arstechnica.com/civis/viewtopic.php?p=22062211 | 8 | # http://arstechnica.com/civis/viewtopic.php?p=22062211 |
9 | 9 | ||
10 | parse_smartctl_attributes_awk="$(cat << 'SMARTCTLAWK' | 10 | # Formatting done via shfmt -i 2 |
11 | # https://github.com/mvdan/sh | ||
12 | |||
13 | parse_smartctl_attributes_awk="$( | ||
14 | cat <<'SMARTCTLAWK' | ||
11 | $1 ~ /^ *[0-9]+$/ && $2 ~ /^[a-zA-Z0-9_-]+$/ { | 15 | $1 ~ /^ *[0-9]+$/ && $2 ~ /^[a-zA-Z0-9_-]+$/ { |
12 | gsub(/-/, "_"); | 16 | gsub(/-/, "_"); |
13 | printf "%s_value{%s,smart_id=\"%s\"} %d\n", $2, labels, $1, $4 | 17 | printf "%s_value{%s,smart_id=\"%s\"} %d\n", $2, labels, $1, $4 |
@@ -18,7 +22,8 @@ $1 ~ /^ *[0-9]+$/ && $2 ~ /^[a-zA-Z0-9_-]+$/ { | |||
18 | SMARTCTLAWK | 22 | SMARTCTLAWK |
19 | )" | 23 | )" |
20 | 24 | ||
21 | smartmon_attrs="$(cat << 'SMARTMONATTRS' | 25 | smartmon_attrs="$( |
26 | cat <<'SMARTMONATTRS' | ||
22 | airflow_temperature_cel | 27 | airflow_temperature_cel |
23 | command_timeout | 28 | command_timeout |
24 | current_pending_sector | 29 | current_pending_sector |
@@ -64,63 +69,65 @@ parse_smartctl_attributes() { | |||
64 | local disk_type="$2" | 69 | local disk_type="$2" |
65 | local labels="disk=\"${disk}\",type=\"${disk_type}\"" | 70 | local labels="disk=\"${disk}\",type=\"${disk_type}\"" |
66 | local vars="$(echo "${smartmon_attrs}" | xargs | tr ' ' '|')" | 71 | local vars="$(echo "${smartmon_attrs}" | xargs | tr ' ' '|')" |
67 | sed 's/^ \+//g' \ | 72 | sed 's/^ \+//g' | |
68 | | awk -v labels="${labels}" "${parse_smartctl_attributes_awk}" 2>/dev/null \ | 73 | awk -v labels="${labels}" "${parse_smartctl_attributes_awk}" 2>/dev/null | |
69 | | tr A-Z a-z \ | 74 | tr A-Z a-z | |
70 | | grep -E "(${smartmon_attrs})" | 75 | grep -E "(${smartmon_attrs})" |
71 | } | 76 | } |
72 | 77 | ||
73 | parse_smartctl_scsi_attributes() { | 78 | parse_smartctl_scsi_attributes() { |
74 | local disk="$1" | 79 | local disk="$1" |
75 | local disk_type="$2" | 80 | local disk_type="$2" |
76 | local labels="disk=\"${disk}\",type=\"${disk_type}\"" | 81 | local labels="disk=\"${disk}\",type=\"${disk_type}\"" |
77 | while read line ; do | 82 | while read line; do |
78 | attr_type="$(echo "${line}" | tr '=' ':' | cut -f1 -d: | sed 's/^ \+//g' | tr ' ' '_')" | 83 | attr_type="$(echo "${line}" | tr '=' ':' | cut -f1 -d: | sed 's/^ \+//g' | tr ' ' '_')" |
79 | attr_value="$(echo "${line}" | tr '=' ':' | cut -f2 -d: | sed 's/^ \+//g')" | 84 | attr_value="$(echo "${line}" | tr '=' ':' | cut -f2 -d: | sed 's/^ \+//g')" |
80 | case "${attr_type}" in | 85 | case "${attr_type}" in |
81 | number_of_hours_powered_up_) power_on="$( echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;; | 86 | number_of_hours_powered_up_) power_on="$(echo "${attr_value}" | awk '{ printf "%e\n", $1 }')" ;; |
82 | Current_Drive_Temperature) temp_cel="$(echo ${attr_value} | cut -f1 -d' ' | awk '{ printf "%e\n", $1 }')" ;; | 87 | Current_Drive_Temperature) temp_cel="$(echo ${attr_value} | cut -f1 -d' ' | awk '{ printf "%e\n", $1 }')" ;; |
83 | Blocks_read_from_cache_and_sent_to_initiator_) lbas_read="$(echo ${attr_value} | awk '{ printf "%e\n", $1 }')" ;; | 88 | Blocks_read_from_cache_and_sent_to_initiator_) lbas_read="$(echo ${attr_value} | awk '{ printf "%e\n", $1 }')" ;; |
84 | Accumulated_start-stop_cycles) power_cycle="$(echo ${attr_value} | awk '{ printf "%e\n", $1 }')" ;; | 89 | Accumulated_start-stop_cycles) power_cycle="$(echo ${attr_value} | awk '{ printf "%e\n", $1 }')" ;; |
85 | esac | 90 | Elements_in_grown_defect_list) grown_defects="$(echo ${attr_value} | awk '{ printf "%e\n", $1 }')" ;; |
86 | done | 91 | esac |
87 | echo "power_on_hours_raw_value{"${labels}",smart_id=\"9\"} ${power_on}" | 92 | done |
88 | echo "temperature_celsius_raw_value{"${labels}",smart_id=\"194\"} ${temp_cel}" | 93 | [ ! -z "$power_on" ] && echo "power_on_hours_raw_value{${labels},smart_id=\"9\"} ${power_on}" |
89 | echo "total_lbas_read_raw_value{"${labels}",smart_id=\"242\"} ${lbas_read}" | 94 | [ ! -z "$temp_cel" ] && echo "temperature_celsius_raw_value{${labels},smart_id=\"194\"} ${temp_cel}" |
90 | echo "power_cycle_count_raw_value{"${labels}",smart_id=\"12\"} ${power_cycle}" | 95 | [ ! -z "$lbas_read" ] && echo "total_lbas_read_raw_value{${labels},smart_id=\"242\"} ${lbas_read}" |
96 | [ ! -z "$power_cycle" ] && echo "power_cycle_count_raw_value{${labels},smart_id=\"12\"} ${power_cycle}" | ||
97 | [ ! -z "$grown_defects" ] && echo "grown_defects_count_raw_value{${labels},smart_id=\"12\"} ${grown_defects}" | ||
91 | } | 98 | } |
92 | 99 | ||
93 | parse_smartctl_info() { | 100 | parse_smartctl_info() { |
94 | local -i smart_available=0 smart_enabled=0 smart_healthy=0 | 101 | local -i smart_available=0 smart_enabled=0 smart_healthy=0 |
95 | local disk="$1" disk_type="$2" | 102 | local disk="$1" disk_type="$2" |
96 | local model_family='' device_model='' serial_number='' fw_version='' vendor='' product='' revision='' lun_id='' | 103 | local model_family='' device_model='' serial_number='' fw_version='' vendor='' product='' revision='' lun_id='' |
97 | while read line ; do | 104 | while read line; do |
98 | info_type="$(echo "${line}" | cut -f1 -d: | tr ' ' '_')" | 105 | info_type="$(echo "${line}" | cut -f1 -d: | tr ' ' '_')" |
99 | info_value="$(echo "${line}" | cut -f2- -d: | sed 's/^ \+//g' | sed 's/"/\\"/')" | 106 | info_value="$(echo "${line}" | cut -f2- -d: | sed 's/^ \+//g' | sed 's/"/\\"/')" |
100 | case "${info_type}" in | 107 | case "${info_type}" in |
101 | Model_Family) model_family="${info_value}" ;; | 108 | Model_Family) model_family="${info_value}" ;; |
102 | Device_Model) device_model="${info_value}" ;; | 109 | Device_Model) device_model="${info_value}" ;; |
103 | Serial_Number) serial_number="${info_value}" ;; | 110 | Serial_Number) serial_number="${info_value}" ;; |
104 | Firmware_Version) fw_version="${info_value}" ;; | 111 | Firmware_Version) fw_version="${info_value}" ;; |
105 | Vendor) vendor="${info_value}" ;; | 112 | Vendor) vendor="${info_value}" ;; |
106 | Product) product="${info_value}" ;; | 113 | Product) product="${info_value}" ;; |
107 | Revision) revision="${info_value}" ;; | 114 | Revision) revision="${info_value}" ;; |
108 | Logical_Unit_id) lun_id="${info_value}" ;; | 115 | Logical_Unit_id) lun_id="${info_value}" ;; |
109 | esac | 116 | esac |
110 | if [[ "${info_type}" == 'SMART_support_is' ]] ; then | 117 | if [[ "${info_type}" == 'SMART_support_is' ]]; then |
111 | case "${info_value:0:7}" in | 118 | case "${info_value:0:7}" in |
112 | Enabled) smart_enabled=1 ;; | 119 | Enabled) smart_enabled=1 ;; |
113 | Availab) smart_available=1 ;; | 120 | Availab) smart_available=1 ;; |
114 | Unavail) smart_available=0 ;; | 121 | Unavail) smart_available=0 ;; |
115 | esac | 122 | esac |
116 | fi | 123 | fi |
117 | if [[ "${info_type}" == 'SMART_overall-health_self-assessment_test_result' ]] ; then | 124 | if [[ "${info_type}" == 'SMART_overall-health_self-assessment_test_result' ]]; then |
118 | case "${info_value:0:6}" in | 125 | case "${info_value:0:6}" in |
119 | PASSED) smart_healthy=1 ;; | 126 | PASSED) smart_healthy=1 ;; |
120 | esac | 127 | esac |
121 | elif [[ "${info_type}" == 'SMART_Health_Status' ]] ; then | 128 | elif [[ "${info_type}" == 'SMART_Health_Status' ]]; then |
122 | case "${info_value:0:2}" in | 129 | case "${info_value:0:2}" in |
123 | OK) smart_healthy=1 ;; | 130 | OK) smart_healthy=1 ;; |
124 | esac | 131 | esac |
125 | fi | 132 | fi |
126 | done | 133 | done |
@@ -130,7 +137,8 @@ parse_smartctl_info() { | |||
130 | echo "device_smart_healthy{disk=\"${disk}\",type=\"${disk_type}\"} ${smart_healthy}" | 137 | echo "device_smart_healthy{disk=\"${disk}\",type=\"${disk_type}\"} ${smart_healthy}" |
131 | } | 138 | } |
132 | 139 | ||
133 | output_format_awk="$(cat << 'OUTPUTAWK' | 140 | output_format_awk="$( |
141 | cat <<'OUTPUTAWK' | ||
134 | BEGIN { v = "" } | 142 | BEGIN { v = "" } |
135 | v != $1 { | 143 | v != $1 { |
136 | print "# HELP smartmon_" $1 " SMART metric " $1; | 144 | print "# HELP smartmon_" $1 " SMART metric " $1; |
@@ -142,15 +150,15 @@ OUTPUTAWK | |||
142 | )" | 150 | )" |
143 | 151 | ||
144 | format_output() { | 152 | format_output() { |
145 | sort \ | 153 | sort | |
146 | | awk -F'{' "${output_format_awk}" | 154 | awk -F'{' "${output_format_awk}" |
147 | } | 155 | } |
148 | 156 | ||
149 | smartctl_version="$(/usr/sbin/smartctl -V | head -n1 | awk '$1 == "smartctl" {print $2}')" | 157 | smartctl_version="$(/usr/sbin/smartctl -V | head -n1 | awk '$1 == "smartctl" {print $2}')" |
150 | 158 | ||
151 | echo "smartctl_version{version=\"${smartctl_version}\"} 1" | format_output | 159 | echo "smartctl_version{version=\"${smartctl_version}\"} 1" | format_output |
152 | 160 | ||
153 | if [[ "$(expr "${smartctl_version}" : '\([0-9]*\)\..*')" -lt 6 ]] ; then | 161 | if [[ "$(expr "${smartctl_version}" : '\([0-9]*\)\..*')" -lt 6 ]]; then |
154 | exit | 162 | exit |
155 | fi | 163 | fi |
156 | 164 | ||
@@ -159,13 +167,18 @@ device_list="$(/usr/sbin/smartctl --scan-open | awk '/^\/dev/{print $1 "|" $3}') | |||
159 | for device in ${device_list}; do | 167 | for device in ${device_list}; do |
160 | disk="$(echo ${device} | cut -f1 -d'|')" | 168 | disk="$(echo ${device} | cut -f1 -d'|')" |
161 | type="$(echo ${device} | cut -f2 -d'|')" | 169 | type="$(echo ${device} | cut -f2 -d'|')" |
162 | echo "smartctl_run{disk=\"${disk}\",type=\"${type}\"}" $(TZ=UTC date '+%s') | 170 | echo "smartctl_run{disk=\"${disk}\",type=\"${type}\"}" "$(TZ=UTC date '+%s')" |
163 | # Get the SMART information and health | 171 | # Get the SMART information and health |
164 | /usr/sbin/smartctl -i -H -d "${type}" "${disk}" | parse_smartctl_info "${disk}" "${type}" | 172 | /usr/sbin/smartctl -i -H -d "${type}" "${disk}" | parse_smartctl_info "${disk}" "${type}" |
165 | # Get the SMART attributes | 173 | # Get the SMART attributes |
166 | case ${type} in | 174 | case ${type} in |
167 | sat) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_attributes "${disk}" "${type}" ;; | 175 | sat) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_attributes "${disk}" "${type}" ;; |
168 | scsi) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_scsi_attributes "${disk}" "${type}" ;; | 176 | sat+megaraid*) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_attributes "${disk}" "${type}" ;; |
169 | *) echo "disk type is not sat or scsi, ${type}"; exit ;; | 177 | scsi) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_scsi_attributes "${disk}" "${type}" ;; |
178 | megaraid*) /usr/sbin/smartctl -A -d "${type}" "${disk}" | parse_smartctl_scsi_attributes "${disk}" "${type}" ;; | ||
179 | *) | ||
180 | echo "disk type is not sat, scsi or megaraid but ${type}" | ||
181 | exit | ||
182 | ;; | ||
170 | esac | 183 | esac |
171 | done | format_output | 184 | done | format_output |
diff --git a/text_collector_examples/storcli.py b/text_collector_examples/storcli.py index 3166290..48e2bba 100755 --- a/text_collector_examples/storcli.py +++ b/text_collector_examples/storcli.py | |||
@@ -1,4 +1,4 @@ | |||
1 | #!/usr/bin/env python | 1 | #!/usr/bin/env python3 |
2 | """ | 2 | """ |
3 | Script to parse StorCLI's JSON output and expose | 3 | Script to parse StorCLI's JSON output and expose |
4 | MegaRAID health as Prometheus metrics. | 4 | MegaRAID health as Prometheus metrics. |
@@ -19,110 +19,181 @@ import argparse | |||
19 | import json | 19 | import json |
20 | import os | 20 | import os |
21 | import subprocess | 21 | import subprocess |
22 | import shlex | ||
23 | from dateutil.parser import parse | ||
24 | import collections | ||
25 | from enum import IntEnum | ||
22 | 26 | ||
23 | DESCRIPTION = """Parses StorCLI's JSON output and exposes MegaRAID health as | 27 | DESCRIPTION = """Parses StorCLI's JSON output and exposes MegaRAID health as |
24 | Prometheus metrics.""" | 28 | Prometheus metrics.""" |
25 | VERSION = '0.0.1' | 29 | VERSION = '0.0.2' |
30 | |||
31 | storcli_path = '' | ||
32 | metric_prefix = 'megaraid_' | ||
33 | metric_list = {} | ||
34 | metric_list = collections.defaultdict(list) | ||
35 | |||
36 | |||
37 | class VD_State(IntEnum): | ||
38 | Optl = 0 # Optimal | ||
39 | Dgrd = 1 # Degraded | ||
40 | Pdgd = 2 # Partially Degraded | ||
41 | OfLn = 3 # Offline | ||
42 | Rec = 4 # Recovery | ||
43 | Cac = 5 # CacheCade | ||
26 | 44 | ||
27 | 45 | ||
28 | def main(args): | 46 | def main(args): |
29 | """ main """ | 47 | """ main """ |
30 | 48 | global storcli_path | |
31 | # exporter variables | 49 | storcli_path = args.storcli_path |
32 | metric_prefix = 'megaraid_' | 50 | data = json.loads(get_storcli_json('/cALL show all J')) |
33 | metric_controller_labels = '{{controller="{}", model="{}"}}' | 51 | |
34 | 52 | # All the information is collected underneath the Controllers key | |
35 | data = json.loads(get_storcli_json(args.storcli_path)) | 53 | data = data['Controllers'] |
36 | 54 | ||
37 | # It appears that the data we need will always be present in the first | 55 | # try: |
38 | # item in the Controllers array | 56 | # overview = status['Response Data']['System Overview'] |
39 | status = data['Controllers'][0] | 57 | # except KeyError: |
40 | 58 | # pass | |
41 | metrics = { | 59 | |
42 | 'status_code': status['Command Status']['Status Code'], | 60 | for controller in data: |
43 | 'controllers': status['Response Data']['Number of Controllers'], | 61 | response = controller['Response Data'] |
44 | } | 62 | if response['Version']['Driver Name'] == 'megaraid_sas': |
45 | 63 | handle_megaraid_controller(response) | |
46 | for name, value in metrics.iteritems(): | 64 | elif response['Version']['Driver Name'] == 'mpt3sas': |
47 | print('# HELP {}{} MegaRAID {}'.format(metric_prefix, name, name.replace('_', ' '))) | 65 | handle_sas_controller(response) |
48 | print('# TYPE {}{} gauge'.format(metric_prefix, name)) | 66 | |
49 | print("{}{} {}".format(metric_prefix, name, value)) | 67 | # print_dict_to_exporter({'controller_info': [1]}, controller_info_list) |
50 | 68 | # print_dict_to_exporter({'virtual_disk_info': [1]}, vd_info_list) | |
51 | controller_info = [] | 69 | # print_dict_to_exporter({'physical_disk_info': [1]}, pd_info_list) |
52 | controller_metrics = {} | 70 | # print_all_metrics(vd_metric_list) |
53 | overview = [] | 71 | print_all_metrics(metric_list) |
54 | 72 | ||
55 | try: | 73 | |
56 | overview = status['Response Data']['System Overview'] | 74 | def handle_sas_controller(response): |
57 | except KeyError: | 75 | pass |
58 | pass | 76 | |
59 | 77 | ||
60 | for controller in overview: | 78 | def handle_megaraid_controller(response): |
61 | controller_index = controller['Ctl'] | 79 | controller_index = response['Basics']['Controller'] |
62 | model = controller['Model'] | 80 | baselabel = 'controller="{}"'.format(controller_index) |
63 | controller_info.append(metric_controller_labels.format(controller_index, model)) | 81 | |
64 | 82 | controller_info_label = baselabel + ',model="{}",serial="{}",fwversion="{}"'.format( | |
65 | controller_metrics = { | 83 | response['Basics']['Model'], |
66 | # FIXME: Parse dimmer switch options | 84 | response['Basics']['Serial Number'], |
67 | # 'dimmer_switch': controller['DS'], | 85 | response['Version']['Firmware Version'], |
68 | 86 | ) | |
69 | 'battery_backup_healthy': int(controller['BBU'] == 'Opt'), | 87 | add_metric('controller_info', controller_info_label, 1) |
70 | 'degraded': int(controller['Hlth'] == 'Dgd'), | 88 | |
71 | 'drive_groups': controller['DGs'], | 89 | add_metric('battery_backup_healthy', baselabel, int(response['Status']['BBU Status'] == 0)) |
72 | 'emergency_hot_spare': int(controller['EHS'] == 'Y'), | 90 | add_metric('degraded', baselabel, int(response['Status']['Controller Status'] == 'Degraded')) |
73 | 'failed': int(controller['Hlth'] == 'Fld'), | 91 | add_metric('failed', baselabel, int(response['Status']['Controller Status'] == 'Failed')) |
74 | 'healthy': int(controller['Hlth'] == 'Opt'), | 92 | add_metric('healthy', baselabel, int(response['Status']['Controller Status'] == 'Optimal')) |
75 | 'physical_drives': controller['PDs'], | 93 | add_metric('drive_groups', baselabel, response['Drive Groups']) |
76 | 'ports': controller['Ports'], | 94 | add_metric('virtual_drives', baselabel, response['Virtual Drives']) |
77 | 'scheduled_patrol_read': int(controller['sPR'] == 'On'), | 95 | add_metric('physical_drives', baselabel, response['Physical Drives']) |
78 | 'virtual_drives': controller['VDs'], | 96 | add_metric('ports', baselabel, response['HwCfg']['Backend Port Count']) |
79 | 97 | add_metric('scheduled_patrol_read', baselabel, | |
80 | # Reverse StorCLI's logic to make metrics consistent | 98 | int('hrs' in response['Scheduled Tasks']['Patrol Read Reoccurrence'])) |
81 | 'drive_groups_optimal': int(controller['DNOpt'] == 0), | 99 | |
82 | 'virtual_drives_optimal': int(controller['VNOpt'] == 0), | 100 | time_difference_seconds = -1 |
83 | } | 101 | system_time = parse(response['Basics'].get('Current System Date/time')) |
84 | 102 | controller_time = parse(response['Basics'].get('Current Controller Date/Time')) | |
85 | for name, value in controller_metrics.iteritems(): | 103 | if system_time and controller_time: |
86 | print('# HELP {}{} MegaRAID {}'.format(metric_prefix, name, name.replace('_', ' '))) | 104 | time_difference_seconds = abs(system_time - controller_time).seconds |
87 | print('# TYPE {}{} gauge'.format(metric_prefix, name)) | 105 | add_metric('time_difference', baselabel, time_difference_seconds) |
88 | print('{}{}{{controller="{}"}} {}'.format(metric_prefix, name, | 106 | |
89 | controller_index, value)) | 107 | for virtual_drive in response['VD LIST']: |
90 | 108 | vd_position = virtual_drive.get('DG/VD') | |
91 | if controller_info: | 109 | drive_group, volume_group = -1, -1 |
92 | print('# HELP {}{} MegaRAID controller info'.format(metric_prefix, 'controller_info')) | 110 | if vd_position: |
93 | print('# TYPE {}{} gauge'.format(metric_prefix, 'controller_info')) | 111 | drive_group = vd_position.split('/')[0] |
94 | for labels in controller_info: | 112 | volume_group = vd_position.split('/')[1] |
95 | print('{}{}{} {}'.format(metric_prefix, 'controller_info', labels, 1)) | 113 | vd_baselabel = 'controller="{}",DG="{}",VG="{}"'.format(controller_index, drive_group, |
96 | 114 | volume_group) | |
97 | 115 | vd_info_label = vd_baselabel + ',name="{}",cache="{}",type="{}"'.format( | |
98 | def get_storcli_json(storcli_path): | 116 | virtual_drive.get('Name'), virtual_drive.get('Cache'), virtual_drive.get('TYPE')) |
117 | add_metric('vd_info', vd_info_label, 1) | ||
118 | add_metric('vd_status', vd_baselabel, int(VD_State[virtual_drive.get('State')])) | ||
119 | |||
120 | if response['Physical Drives'] > 0: | ||
121 | data = json.loads(get_storcli_json('/cALL/eALL/sALL show all J')) | ||
122 | drive_info = data['Controllers'][controller_index]['Response Data'] | ||
123 | for physical_drive in response['PD LIST']: | ||
124 | enclosure = physical_drive.get('EID:Slt').split(':')[0] | ||
125 | slot = physical_drive.get('EID:Slt').split(':')[1] | ||
126 | |||
127 | pd_baselabel = 'controller="{}",enclosure="{}",slot="{}"'.format( | ||
128 | controller_index, enclosure, slot) | ||
129 | pd_info_label = pd_baselabel + ',disk_id="{}",interface="{}",media="{}",model="{}"'.format( | ||
130 | physical_drive.get('DID'), physical_drive.get('Intf'), physical_drive.get('Med'), | ||
131 | physical_drive.get('Model').strip()) | ||
132 | |||
133 | drive_identifier = 'Drive /c' + str(controller_index) + '/e' + str(enclosure) + '/s' + str( | ||
134 | slot) | ||
135 | try: | ||
136 | info = drive_info[drive_identifier + ' - Detailed Information'] | ||
137 | state = info[drive_identifier + ' State'] | ||
138 | attributes = info[drive_identifier + ' Device attributes'] | ||
139 | settings = info[drive_identifier + ' Policies/Settings'] | ||
140 | |||
141 | add_metric('pd_shield_counter', pd_baselabel, state['Shield Counter']) | ||
142 | add_metric('pd_media_errors_total', pd_baselabel, state['Media Error Count']) | ||
143 | add_metric('pd_other_errors_total', pd_baselabel, state['Other Error Count']) | ||
144 | add_metric('pd_predictive_errors_total', pd_baselabel, | ||
145 | state['Predictive Failure Count']) | ||
146 | add_metric('pd_smart_alerted', pd_baselabel, | ||
147 | int(state['S.M.A.R.T alert flagged by drive'] == 'Yes')) | ||
148 | add_metric('pd_link_speed_gbps', pd_baselabel, attributes['Link Speed'].split('.')[0]) | ||
149 | add_metric('pd_device_speed_gbps', pd_baselabel, | ||
150 | attributes['Device Speed'].split('.')[0]) | ||
151 | add_metric('pd_commissioned_spare', pd_baselabel, | ||
152 | int(settings['Commissioned Spare'] == 'Yes')) | ||
153 | add_metric('pd_emergency_spare', pd_baselabel, | ||
154 | int(settings['Emergency Spare'] == 'Yes')) | ||
155 | pd_info_label += ',firmware="{}"'.format(attributes['Firmware Revision']) | ||
156 | except KeyError: | ||
157 | pass | ||
158 | add_metric('pd_info', pd_info_label, 1) | ||
159 | |||
160 | |||
161 | def add_metric(name, labels, value): | ||
162 | global metric_list | ||
163 | metric_list[name].append({ | ||
164 | 'labels': labels, | ||
165 | 'value': value, | ||
166 | }) | ||
167 | |||
168 | |||
169 | def print_all_metrics(metrics): | ||
170 | for metric, measurements in metrics.items(): | ||
171 | print('# HELP {}{} MegaRAID {}'.format(metric_prefix, metric, metric.replace('_', ' '))) | ||
172 | print('# TYPE {}{} gauge'.format(metric_prefix, metric)) | ||
173 | for measurement in measurements: | ||
174 | print('{}{}{} {}'.format(metric_prefix, metric, '{' + measurement['labels'] + '}', | ||
175 | measurement['value'])) | ||
176 | |||
177 | |||
178 | def get_storcli_json(storcli_args): | ||
99 | """Get storcli output in JSON format.""" | 179 | """Get storcli output in JSON format.""" |
180 | # Check if storcli is installed and executable | ||
181 | if not (os.path.isfile(storcli_path) and os.access(storcli_path, os.X_OK)): | ||
182 | SystemExit(1) | ||
183 | storcli_cmd = shlex.split(storcli_path + ' ' + storcli_args) | ||
184 | proc = subprocess.Popen( | ||
185 | storcli_cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||
186 | output_json = proc.communicate()[0] | ||
187 | |||
188 | return output_json.decode("utf-8") | ||
100 | 189 | ||
101 | # Check if storcli is installed | ||
102 | if os.path.isfile(storcli_path) and os.access(storcli_path, os.X_OK): | ||
103 | storcli_cmd = [storcli_path, 'show', 'all', 'J'] | ||
104 | proc = subprocess.Popen(storcli_cmd, shell=False, | ||
105 | stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||
106 | output_json = proc.communicate()[0] | ||
107 | else: | ||
108 | # Create an empty dummy-JSON where storcli not installed. | ||
109 | dummy_json = {"Controllers":[{ | ||
110 | "Command Status": {"Status Code": 0, "Status": "Success", | ||
111 | "Description": "None"}, | ||
112 | "Response Data": {"Number of Controllers": 0}}]} | ||
113 | output_json = json.dumps(dummy_json) | ||
114 | |||
115 | return output_json | ||
116 | 190 | ||
117 | if __name__ == "__main__": | 191 | if __name__ == "__main__": |
118 | PARSER = argparse.ArgumentParser(description=DESCRIPTION, | 192 | PARSER = argparse.ArgumentParser( |
119 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) | 193 | description=DESCRIPTION, formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
120 | PARSER.add_argument('--storcli_path', | 194 | PARSER.add_argument( |
121 | default='/opt/MegaRAID/storcli/storcli64', | 195 | '--storcli_path', default='/opt/MegaRAID/storcli/storcli64', help='path to StorCLi binary') |
122 | help='path to StorCLi binary') | 196 | PARSER.add_argument('--version', action='version', version='%(prog)s {}'.format(VERSION)) |
123 | PARSER.add_argument('--version', | ||
124 | action='version', | ||
125 | version='%(prog)s {}'.format(VERSION)) | ||
126 | ARGS = PARSER.parse_args() | 197 | ARGS = PARSER.parse_args() |
127 | 198 | ||
128 | main(ARGS) | 199 | main(ARGS) |