textfile example storcli enhancements (#1145)

* storcli.py: Remove IntEnum This removes an external dependency. Moved VD state to VD info labels * storcli.py: Fix BBU health detection BBU Status is 0 for a healthy cache vault and 32 for a healthy BBU. * storcli.py: Strip all strings from PD Strip all strings that we get from PDs. They often contain whitespaces.... * storcli.py: Add formatting options Add help text explaining how this documented was formatted * storcli.py: Add DG to pd_info label Add disk group to pd_info. That way we can relate to PDs in the same DG. For example to check if all disks in one RAID use the same interface... * storcli.py: Fix promtool issues Fix linting issues reported by promtool check-metrics * storcli.py: Exit if storcli reports issues storcli reports if the command was a success. We should not continue if there are issues. * storcli.py: Try to parse metrics to float This will sanitize the values we hand over to node_exporter - eliminating any unforeseen values we read out... * storcli.py: Refactor code to implement handle_sas_controller() Move code into methods so that we can now also support HBA queries. * storcli.py: Sort inputs "...like a good python developer" - Daniel Swarbrick * storcli.py: Replace external dateutil library with internal datetime Removes external dependency... * storcli.py: Also collect temperature on megaraid cards We have already collected them on mpt3sas cards... * storcli.py: Clean up old code Removed dead code that is not used any more. * storcli.py: strip() all information for labels They often contain whitespaces... * storcli.py: Try to catch KeyErrors generally If some key we expect is not there, we will want to still print whatever we have collected so far... * storcli.py: Increment version number We have made some changes here and there. The general look of the data has not been changed. * storcli.py: Fix CodeSpell issue Split string to avoid issues with Codespell due to Celcius in JSON Key Signed-off-by: Christopher Blum <zeichenanonym@web.de>
author: Christopher Blum <zeichenanonym@web.de> 2018-11-07 17:12:23 +0100
committer: Ben Kochie <superq@gmail.com> 2018-11-07 17:12:23 +0100
commit: 1b98db9fa72abe93541fb1a7140388504601e303 (patch)
tree: b4d8fc9fbe1c324ff6e51d498adc2c76b1b3da10 /text_collector_examples
parent: 29d4629f55603001e25fea65e8cb593b86e58f47 (diff)
download: prometheus_node_collector-1b98db9fa72abe93541fb1a7140388504601e303.tar.bz2
prometheus_node_collector-1b98db9fa72abe93541fb1a7140388504601e303.tar.xz
prometheus_node_collector-1b98db9fa72abe93541fb1a7140388504601e303.zip
1 files changed, 122 insertions, 92 deletions
diff --git a/text_collector_examples/storcli.py b/text_collector_examples/storcli.py
index 48e2bba..65fc00f 100755
--- a/text_collector_examples/storcli.py
+++ b/text_collector_examples/storcli.py
@@ -12,21 +12,23 @@ Advanced Software Options (ASO) not exposed as metrics currently.
 JSON key abbreviations used by StorCLI are documented in the standard command
 output, i.e.  when you omit the trailing 'J' from the command.
+Formatting done with YAPF:
+$ yapf -i --style '{COLUMN_LIMIT: 99}' storcli.py
 """
 from __future__ import print_function
+from datetime import datetime
 import argparse
+import collections
 import json
 import os
-import subprocess
 import shlex
-from dateutil.parser import parse
+import subprocess
-import collections
-from enum import IntEnum
 DESCRIPTION = """Parses StorCLI's JSON output and exposes MegaRAID health as
    Prometheus metrics."""
-VERSION = '0.0.2'
+VERSION = '0.0.3'
 storcli_path = ''
 metric_prefix = 'megaraid_'
@@ -34,59 +36,55 @@ metric_list = {}
 metric_list = collections.defaultdict(list)
-class VD_State(IntEnum):
-    Optl = 0  # Optimal
-    Dgrd = 1  # Degraded
-    Pdgd = 2  # Partially Degraded
-    OfLn = 3  # Offline
-    Rec = 4  # Recovery
-    Cac = 5  # CacheCade
 def main(args):
    """ main """
    global storcli_path
    storcli_path = args.storcli_path
-    data = json.loads(get_storcli_json('/cALL show all J'))
+    data = get_storcli_json('/cALL show all J')
-    # All the information is collected underneath the Controllers key
+    try:
-    data = data['Controllers']
+        # All the information is collected underneath the Controllers key
+        data = data['Controllers']
-    # try:
-    #     overview = status['Response Data']['System Overview']
+        for controller in data:
-    # except KeyError:
+            response = controller['Response Data']
-    #     pass
+            if response['Version']['Driver Name'] == 'megaraid_sas':
+                handle_megaraid_controller(response)
-    for controller in data:
+            elif response['Version']['Driver Name'] == 'mpt3sas':
-        response = controller['Response Data']
+                handle_sas_controller(response)
-        if response['Version']['Driver Name'] == 'megaraid_sas':
+    except KeyError:
-            handle_megaraid_controller(response)
+        pass
-        elif response['Version']['Driver Name'] == 'mpt3sas':
-            handle_sas_controller(response)
-    # print_dict_to_exporter({'controller_info': [1]}, controller_info_list)
-    # print_dict_to_exporter({'virtual_disk_info': [1]}, vd_info_list)
-    # print_dict_to_exporter({'physical_disk_info': [1]}, pd_info_list)
-    # print_all_metrics(vd_metric_list)
    print_all_metrics(metric_list)
 def handle_sas_controller(response):
-    pass
+    (controller_index, baselabel) = get_basic_controller_info(response)
+    add_metric('healthy', baselabel, int(response['Status']['Controller Status'] == 'OK'))
+    add_metric('ports', baselabel, response['HwCfg']['Backend Port Count'])
+    try:
+        # The number of physical disks is half of the number of items in this dict
+        # Every disk is listed twice - once for basic info, again for detailed info
+        add_metric('physical_drives', baselabel,
+                   len(response['Physical Device Information'].keys()) / 2)
+    except AttributeError:
+        pass
+    # Split up string to not trigger CodeSpell issues
+    add_metric('temperature', baselabel,
+               int(response['HwCfg']['ROC temperature(Degree Celc' + 'ius)']))
+    for key, basic_disk_info in response['Physical Device Information'].items():
+        if 'Detailed Information' in key:
+            continue
+        create_metrcis_of_physical_drive(basic_disk_info[0],
+                                         response['Physical Device Information'], controller_index)
 def handle_megaraid_controller(response):
-    controller_index = response['Basics']['Controller']
+    (controller_index, baselabel) = get_basic_controller_info(response)
-    baselabel = 'controller="{}"'.format(controller_index)
-    controller_info_label = baselabel + ',model="{}",serial="{}",fwversion="{}"'.format(
+    # BBU Status Optimal value is 0 for cachevault and 32 for BBU
-        response['Basics']['Model'],
+    add_metric('battery_backup_healthy', baselabel,
-        response['Basics']['Serial Number'],
+               int(response['Status']['BBU Status'] in [0, 32]))
-        response['Version']['Firmware Version'],
-    )
-    add_metric('controller_info', controller_info_label, 1)
-    add_metric('battery_backup_healthy', baselabel, int(response['Status']['BBU Status'] == 0))
    add_metric('degraded', baselabel, int(response['Status']['Controller Status'] == 'Degraded'))
    add_metric('failed', baselabel, int(response['Status']['Controller Status'] == 'Failed'))
    add_metric('healthy', baselabel, int(response['Status']['Controller Status'] == 'Optimal'))
@@ -96,10 +94,13 @@ def handle_megaraid_controller(response):
    add_metric('ports', baselabel, response['HwCfg']['Backend Port Count'])
    add_metric('scheduled_patrol_read', baselabel,
               int('hrs' in response['Scheduled Tasks']['Patrol Read Reoccurrence']))
+    add_metric('temperature', baselabel, int(response['HwCfg']['ROC temperature(Degree Celsius)']))
    time_difference_seconds = -1
-    system_time = parse(response['Basics'].get('Current System Date/time'))
+    system_time = datetime.strptime(response['Basics'].get('Current System Date/time'),
-    controller_time = parse(response['Basics'].get('Current Controller Date/Time'))
+                                    "%m/%d/%Y, %H:%M:%S")
+    controller_time = datetime.strptime(response['Basics'].get('Current Controller Date/Time'),
+                                        "%m/%d/%Y, %H:%M:%S")
    if system_time and controller_time:
        time_difference_seconds = abs(system_time - controller_time).seconds
        add_metric('time_difference', baselabel, time_difference_seconds)
@@ -112,58 +113,84 @@ def handle_megaraid_controller(response):
            volume_group = vd_position.split('/')[1]
        vd_baselabel = 'controller="{}",DG="{}",VG="{}"'.format(controller_index, drive_group,
                                                                volume_group)
-        vd_info_label = vd_baselabel + ',name="{}",cache="{}",type="{}"'.format(
+        vd_info_label = vd_baselabel + ',name="{}",cache="{}",type="{}",state="{}"'.format(
-            virtual_drive.get('Name'), virtual_drive.get('Cache'), virtual_drive.get('TYPE'))
+            str(virtual_drive.get('Name')).strip(),
+            str(virtual_drive.get('Cache')).strip(),
+            str(virtual_drive.get('TYPE')).strip(),
+            str(virtual_drive.get('State')).strip())
        add_metric('vd_info', vd_info_label, 1)
-        add_metric('vd_status', vd_baselabel, int(VD_State[virtual_drive.get('State')]))
    if response['Physical Drives'] > 0:
-        data = json.loads(get_storcli_json('/cALL/eALL/sALL show all J'))
+        data = get_storcli_json('/cALL/eALL/sALL show all J')
        drive_info = data['Controllers'][controller_index]['Response Data']
    for physical_drive in response['PD LIST']:
-        enclosure = physical_drive.get('EID:Slt').split(':')[0]
+        create_metrcis_of_physical_drive(physical_drive, drive_info, controller_index)
-        slot = physical_drive.get('EID:Slt').split(':')[1]
-        pd_baselabel = 'controller="{}",enclosure="{}",slot="{}"'.format(
+def get_basic_controller_info(response):
-            controller_index, enclosure, slot)
+    controller_index = response['Basics']['Controller']
-        pd_info_label = pd_baselabel + ',disk_id="{}",interface="{}",media="{}",model="{}"'.format(
+    baselabel = 'controller="{}"'.format(controller_index)
-            physical_drive.get('DID'), physical_drive.get('Intf'), physical_drive.get('Med'),
-            physical_drive.get('Model').strip())
+    controller_info_label = baselabel + ',model="{}",serial="{}",fwversion="{}"'.format(
+        str(response['Basics']['Model']).strip(),
-        drive_identifier = 'Drive /c' + str(controller_index) + '/e' + str(enclosure) + '/s' + str(
+        str(response['Basics']['Serial Number']).strip(),
-            slot)
+        str(response['Version']['Firmware Version']).strip(),
-        try:
+    )
-            info = drive_info[drive_identifier + ' - Detailed Information']
+    add_metric('controller_info', controller_info_label, 1)
-            state = info[drive_identifier + ' State']
-            attributes = info[drive_identifier + ' Device attributes']
+    return (controller_index, baselabel)
-            settings = info[drive_identifier + ' Policies/Settings']
-            add_metric('pd_shield_counter', pd_baselabel, state['Shield Counter'])
+def create_metrcis_of_physical_drive(physical_drive, detailed_info_array, controller_index):
-            add_metric('pd_media_errors_total', pd_baselabel, state['Media Error Count'])
+    enclosure = physical_drive.get('EID:Slt').split(':')[0]
-            add_metric('pd_other_errors_total', pd_baselabel, state['Other Error Count'])
+    slot = physical_drive.get('EID:Slt').split(':')[1]
-            add_metric('pd_predictive_errors_total', pd_baselabel,
-                       state['Predictive Failure Count'])
+    pd_baselabel = 'controller="{}",enclosure="{}",slot="{}"'.format(controller_index, enclosure,
-            add_metric('pd_smart_alerted', pd_baselabel,
+                                                                     slot)
-                       int(state['S.M.A.R.T alert flagged by drive'] == 'Yes'))
+    pd_info_label = pd_baselabel + \
-            add_metric('pd_link_speed_gbps', pd_baselabel, attributes['Link Speed'].split('.')[0])
+        ',disk_id="{}",interface="{}",media="{}",model="{}",DG="{}"'.format(
-            add_metric('pd_device_speed_gbps', pd_baselabel,
+            str(physical_drive.get('DID')).strip(),
-                       attributes['Device Speed'].split('.')[0])
+            str(physical_drive.get('Intf')).strip(),
-            add_metric('pd_commissioned_spare', pd_baselabel,
+            str(physical_drive.get('Med')).strip(),
-                       int(settings['Commissioned Spare'] == 'Yes'))
+            str(physical_drive.get('Model')).strip(),
-            add_metric('pd_emergency_spare', pd_baselabel,
+            str(physical_drive.get('DG')).strip())
-                       int(settings['Emergency Spare'] == 'Yes'))
-            pd_info_label += ',firmware="{}"'.format(attributes['Firmware Revision'])
+    drive_identifier = 'Drive /c' + str(controller_index) + '/e' + str(enclosure) + '/s' + str(
-        except KeyError:
+        slot)
-            pass
+    if enclosure == ' ':
-        add_metric('pd_info', pd_info_label, 1)
+        drive_identifier = 'Drive /c' + str(controller_index) + '/s' + str(slot)
+    try:
+        info = detailed_info_array[drive_identifier + ' - Detailed Information']
+        state = info[drive_identifier + ' State']
+        attributes = info[drive_identifier + ' Device attributes']
+        settings = info[drive_identifier + ' Policies/Settings']
+        add_metric('pd_shield_counter', pd_baselabel, state['Shield Counter'])
+        add_metric('pd_media_errors', pd_baselabel, state['Media Error Count'])
+        add_metric('pd_other_errors', pd_baselabel, state['Other Error Count'])
+        add_metric('pd_predictive_errors', pd_baselabel, state['Predictive Failure Count'])
+        add_metric('pd_smart_alerted', pd_baselabel,
+                   int(state['S.M.A.R.T alert flagged by drive'] == 'Yes'))
+        add_metric('pd_link_speed_gbps', pd_baselabel, attributes['Link Speed'].split('.')[0])
+        add_metric('pd_device_speed_gbps', pd_baselabel, attributes['Device Speed'].split('.')[0])
+        add_metric('pd_commissioned_spare', pd_baselabel,
+                   int(settings['Commissioned Spare'] == 'Yes'))
+        add_metric('pd_emergency_spare', pd_baselabel, int(settings['Emergency Spare'] == 'Yes'))
+        pd_info_label += ',firmware="{}"'.format(attributes['Firmware Revision'].strip())
+    except KeyError:
+        pass
+    add_metric('pd_info', pd_info_label, 1)
 def add_metric(name, labels, value):
    global metric_list
-    metric_list[name].append({
+    try:
-        'labels': labels,
+        metric_list[name].append({
-        'value': value,
+            'labels': labels,
-    })
+            'value': float(value),
+        })
+    except ValueError:
+        pass
 def print_all_metrics(metrics):
@@ -184,8 +211,11 @@ def get_storcli_json(storcli_args):
    proc = subprocess.Popen(
        storcli_cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output_json = proc.communicate()[0]
+    data = json.loads(output_json.decode("utf-8"))
-    return output_json.decode("utf-8")
+    if data["Controllers"][0]["Command Status"]["Status"] != "Success":
+        SystemExit(1)
+    return data
 if __name__ == "__main__":
author	Christopher Blum <zeichenanonym@web.de>	2018-11-07 17:12:23 +0100
committer	Ben Kochie <superq@gmail.com>	2018-11-07 17:12:23 +0100
commit	1b98db9fa72abe93541fb1a7140388504601e303 (patch)
tree	b4d8fc9fbe1c324ff6e51d498adc2c76b1b3da10 /text_collector_examples
parent	29d4629f55603001e25fea65e8cb593b86e58f47 (diff)
download	prometheus_node_collector-1b98db9fa72abe93541fb1a7140388504601e303.tar.bz2 prometheus_node_collector-1b98db9fa72abe93541fb1a7140388504601e303.tar.xz prometheus_node_collector-1b98db9fa72abe93541fb1a7140388504601e303.zip

diff --git a/text_collector_examples/storcli.py b/text_collector_examples/storcli.py index 48e2bba..65fc00f 100755 --- a/text_collector_examples/storcli.py +++ b/text_collector_examples/storcli.py
@@ -12,21 +12,23 @@ Advanced Software Options (ASO) not exposed as metrics currently.
12		12
13	JSON key abbreviations used by StorCLI are documented in the standard command	13	JSON key abbreviations used by StorCLI are documented in the standard command
14	output, i.e. when you omit the trailing 'J' from the command.	14	output, i.e. when you omit the trailing 'J' from the command.
		15
		16	Formatting done with YAPF:
		17	$ yapf -i --style '{COLUMN_LIMIT: 99}' storcli.py
15	"""	18	"""
16		19
17	from __future__ import print_function	20	from __future__ import print_function
		21	from datetime import datetime
18	import argparse	22	import argparse
		23	import collections
19	import json	24	import json
20	import os	25	import os
21	import subprocess
22	import shlex	26	import shlex
23	from dateutil.parser import parse	27	import subprocess
24	import collections
25	from enum import IntEnum
26		28
27	DESCRIPTION = """Parses StorCLI's JSON output and exposes MegaRAID health as	29	DESCRIPTION = """Parses StorCLI's JSON output and exposes MegaRAID health as
28	Prometheus metrics."""	30	Prometheus metrics."""
29	VERSION = '0.0.2'	31	VERSION = '0.0.3'
30		32
31	storcli_path = ''	33	storcli_path = ''
32	metric_prefix = 'megaraid_'	34	metric_prefix = 'megaraid_'
@@ -34,59 +36,55 @@ metric_list = {}
34	metric_list = collections.defaultdict(list)	36	metric_list = collections.defaultdict(list)
35		37
36		38
37	class VD_State(IntEnum):
38	Optl = 0 # Optimal
39	Dgrd = 1 # Degraded
40	Pdgd = 2 # Partially Degraded
41	OfLn = 3 # Offline
42	Rec = 4 # Recovery
43	Cac = 5 # CacheCade
44
45
46	def main(args):	39	def main(args):
47	""" main """	40	""" main """
48	global storcli_path	41	global storcli_path
49	storcli_path = args.storcli_path	42	storcli_path = args.storcli_path
50	data = json.loads(get_storcli_json('/cALL show all J'))	43	data = get_storcli_json('/cALL show all J')
51		44
52	# All the information is collected underneath the Controllers key	45	try:
53	data = data['Controllers']	46	# All the information is collected underneath the Controllers key
54		47	data = data['Controllers']
55	# try:	48
56	# overview = status['Response Data']['System Overview']	49	for controller in data:
57	# except KeyError:	50	response = controller['Response Data']
58	# pass	51	if response['Version']['Driver Name'] == 'megaraid_sas':
59		52	handle_megaraid_controller(response)
60	for controller in data:	53	elif response['Version']['Driver Name'] == 'mpt3sas':
61	response = controller['Response Data']	54	handle_sas_controller(response)
62	if response['Version']['Driver Name'] == 'megaraid_sas':	55	except KeyError:
63	handle_megaraid_controller(response)	56	pass
64	elif response['Version']['Driver Name'] == 'mpt3sas':	57
65	handle_sas_controller(response)
66
67	# print_dict_to_exporter({'controller_info': [1]}, controller_info_list)
68	# print_dict_to_exporter({'virtual_disk_info': [1]}, vd_info_list)
69	# print_dict_to_exporter({'physical_disk_info': [1]}, pd_info_list)
70	# print_all_metrics(vd_metric_list)
71	print_all_metrics(metric_list)	58	print_all_metrics(metric_list)
72		59
73		60
74	def handle_sas_controller(response):	61	def handle_sas_controller(response):
75	pass	62	(controller_index, baselabel) = get_basic_controller_info(response)
		63	add_metric('healthy', baselabel, int(response['Status']['Controller Status'] == 'OK'))
		64	add_metric('ports', baselabel, response['HwCfg']['Backend Port Count'])
		65	try:
		66	# The number of physical disks is half of the number of items in this dict
		67	# Every disk is listed twice - once for basic info, again for detailed info
		68	add_metric('physical_drives', baselabel,
		69	len(response['Physical Device Information'].keys()) / 2)
		70	except AttributeError:
		71	pass
		72	# Split up string to not trigger CodeSpell issues
		73	add_metric('temperature', baselabel,
		74	int(response['HwCfg']['ROC temperature(Degree Celc' + 'ius)']))
		75	for key, basic_disk_info in response['Physical Device Information'].items():
		76	if 'Detailed Information' in key:
		77	continue
		78	create_metrcis_of_physical_drive(basic_disk_info[0],
		79	response['Physical Device Information'], controller_index)
76		80
77		81
78	def handle_megaraid_controller(response):	82	def handle_megaraid_controller(response):
79	controller_index = response['Basics']['Controller']	83	(controller_index, baselabel) = get_basic_controller_info(response)
80	baselabel = 'controller="{}"'.format(controller_index)
81		84
82	controller_info_label = baselabel + ',model="{}",serial="{}",fwversion="{}"'.format(	85	# BBU Status Optimal value is 0 for cachevault and 32 for BBU
83	response['Basics']['Model'],	86	add_metric('battery_backup_healthy', baselabel,
84	response['Basics']['Serial Number'],	87	int(response['Status']['BBU Status'] in [0, 32]))
85	response['Version']['Firmware Version'],
86	)
87	add_metric('controller_info', controller_info_label, 1)
88
89	add_metric('battery_backup_healthy', baselabel, int(response['Status']['BBU Status'] == 0))
90	add_metric('degraded', baselabel, int(response['Status']['Controller Status'] == 'Degraded'))	88	add_metric('degraded', baselabel, int(response['Status']['Controller Status'] == 'Degraded'))
91	add_metric('failed', baselabel, int(response['Status']['Controller Status'] == 'Failed'))	89	add_metric('failed', baselabel, int(response['Status']['Controller Status'] == 'Failed'))
92	add_metric('healthy', baselabel, int(response['Status']['Controller Status'] == 'Optimal'))	90	add_metric('healthy', baselabel, int(response['Status']['Controller Status'] == 'Optimal'))
@@ -96,10 +94,13 @@ def handle_megaraid_controller(response):
96	add_metric('ports', baselabel, response['HwCfg']['Backend Port Count'])	94	add_metric('ports', baselabel, response['HwCfg']['Backend Port Count'])
97	add_metric('scheduled_patrol_read', baselabel,	95	add_metric('scheduled_patrol_read', baselabel,
98	int('hrs' in response['Scheduled Tasks']['Patrol Read Reoccurrence']))	96	int('hrs' in response['Scheduled Tasks']['Patrol Read Reoccurrence']))
		97	add_metric('temperature', baselabel, int(response['HwCfg']['ROC temperature(Degree Celsius)']))
99		98
100	time_difference_seconds = -1	99	time_difference_seconds = -1
101	system_time = parse(response['Basics'].get('Current System Date/time'))	100	system_time = datetime.strptime(response['Basics'].get('Current System Date/time'),
102	controller_time = parse(response['Basics'].get('Current Controller Date/Time'))	101	"%m/%d/%Y, %H:%M:%S")
		102	controller_time = datetime.strptime(response['Basics'].get('Current Controller Date/Time'),
		103	"%m/%d/%Y, %H:%M:%S")
103	if system_time and controller_time:	104	if system_time and controller_time:
104	time_difference_seconds = abs(system_time - controller_time).seconds	105	time_difference_seconds = abs(system_time - controller_time).seconds
105	add_metric('time_difference', baselabel, time_difference_seconds)	106	add_metric('time_difference', baselabel, time_difference_seconds)
@@ -112,58 +113,84 @@ def handle_megaraid_controller(response):
112	volume_group = vd_position.split('/')[1]	113	volume_group = vd_position.split('/')[1]
113	vd_baselabel = 'controller="{}",DG="{}",VG="{}"'.format(controller_index, drive_group,	114	vd_baselabel = 'controller="{}",DG="{}",VG="{}"'.format(controller_index, drive_group,
114	volume_group)	115	volume_group)
115	vd_info_label = vd_baselabel + ',name="{}",cache="{}",type="{}"'.format(	116	vd_info_label = vd_baselabel + ',name="{}",cache="{}",type="{}",state="{}"'.format(
116	virtual_drive.get('Name'), virtual_drive.get('Cache'), virtual_drive.get('TYPE'))	117	str(virtual_drive.get('Name')).strip(),
		118	str(virtual_drive.get('Cache')).strip(),
		119	str(virtual_drive.get('TYPE')).strip(),
		120	str(virtual_drive.get('State')).strip())
117	add_metric('vd_info', vd_info_label, 1)	121	add_metric('vd_info', vd_info_label, 1)
118	add_metric('vd_status', vd_baselabel, int(VD_State[virtual_drive.get('State')]))
119		122
120	if response['Physical Drives'] > 0:	123	if response['Physical Drives'] > 0:
121	data = json.loads(get_storcli_json('/cALL/eALL/sALL show all J'))	124	data = get_storcli_json('/cALL/eALL/sALL show all J')
122	drive_info = data['Controllers'][controller_index]['Response Data']	125	drive_info = data['Controllers'][controller_index]['Response Data']
123	for physical_drive in response['PD LIST']:	126	for physical_drive in response['PD LIST']:
124	enclosure = physical_drive.get('EID:Slt').split(':')[0]	127	create_metrcis_of_physical_drive(physical_drive, drive_info, controller_index)
125	slot = physical_drive.get('EID:Slt').split(':')[1]	128
126		129
127	pd_baselabel = 'controller="{}",enclosure="{}",slot="{}"'.format(	130	def get_basic_controller_info(response):
128	controller_index, enclosure, slot)	131	controller_index = response['Basics']['Controller']
129	pd_info_label = pd_baselabel + ',disk_id="{}",interface="{}",media="{}",model="{}"'.format(	132	baselabel = 'controller="{}"'.format(controller_index)
130	physical_drive.get('DID'), physical_drive.get('Intf'), physical_drive.get('Med'),	133
131	physical_drive.get('Model').strip())	134	controller_info_label = baselabel + ',model="{}",serial="{}",fwversion="{}"'.format(
132		135	str(response['Basics']['Model']).strip(),
133	drive_identifier = 'Drive /c' + str(controller_index) + '/e' + str(enclosure) + '/s' + str(	136	str(response['Basics']['Serial Number']).strip(),
134	slot)	137	str(response['Version']['Firmware Version']).strip(),
135	try:	138	)
136	info = drive_info[drive_identifier + ' - Detailed Information']	139	add_metric('controller_info', controller_info_label, 1)
137	state = info[drive_identifier + ' State']	140
138	attributes = info[drive_identifier + ' Device attributes']	141	return (controller_index, baselabel)
139	settings = info[drive_identifier + ' Policies/Settings']	142
140		143
141	add_metric('pd_shield_counter', pd_baselabel, state['Shield Counter'])	144	def create_metrcis_of_physical_drive(physical_drive, detailed_info_array, controller_index):
142	add_metric('pd_media_errors_total', pd_baselabel, state['Media Error Count'])	145	enclosure = physical_drive.get('EID:Slt').split(':')[0]
143	add_metric('pd_other_errors_total', pd_baselabel, state['Other Error Count'])	146	slot = physical_drive.get('EID:Slt').split(':')[1]
144	add_metric('pd_predictive_errors_total', pd_baselabel,	147
145	state['Predictive Failure Count'])	148	pd_baselabel = 'controller="{}",enclosure="{}",slot="{}"'.format(controller_index, enclosure,
146	add_metric('pd_smart_alerted', pd_baselabel,	149	slot)
147	int(state['S.M.A.R.T alert flagged by drive'] == 'Yes'))	150	pd_info_label = pd_baselabel + \
148	add_metric('pd_link_speed_gbps', pd_baselabel, attributes['Link Speed'].split('.')[0])	151	',disk_id="{}",interface="{}",media="{}",model="{}",DG="{}"'.format(
149	add_metric('pd_device_speed_gbps', pd_baselabel,	152	str(physical_drive.get('DID')).strip(),
150	attributes['Device Speed'].split('.')[0])	153	str(physical_drive.get('Intf')).strip(),
151	add_metric('pd_commissioned_spare', pd_baselabel,	154	str(physical_drive.get('Med')).strip(),
152	int(settings['Commissioned Spare'] == 'Yes'))	155	str(physical_drive.get('Model')).strip(),
153	add_metric('pd_emergency_spare', pd_baselabel,	156	str(physical_drive.get('DG')).strip())
154	int(settings['Emergency Spare'] == 'Yes'))	157
155	pd_info_label += ',firmware="{}"'.format(attributes['Firmware Revision'])	158	drive_identifier = 'Drive /c' + str(controller_index) + '/e' + str(enclosure) + '/s' + str(
156	except KeyError:	159	slot)
157	pass	160	if enclosure == ' ':
158	add_metric('pd_info', pd_info_label, 1)	161	drive_identifier = 'Drive /c' + str(controller_index) + '/s' + str(slot)
		162	try:
		163	info = detailed_info_array[drive_identifier + ' - Detailed Information']
		164	state = info[drive_identifier + ' State']
		165	attributes = info[drive_identifier + ' Device attributes']
		166	settings = info[drive_identifier + ' Policies/Settings']
		167
		168	add_metric('pd_shield_counter', pd_baselabel, state['Shield Counter'])
		169	add_metric('pd_media_errors', pd_baselabel, state['Media Error Count'])
		170	add_metric('pd_other_errors', pd_baselabel, state['Other Error Count'])
		171	add_metric('pd_predictive_errors', pd_baselabel, state['Predictive Failure Count'])
		172	add_metric('pd_smart_alerted', pd_baselabel,
		173	int(state['S.M.A.R.T alert flagged by drive'] == 'Yes'))
		174	add_metric('pd_link_speed_gbps', pd_baselabel, attributes['Link Speed'].split('.')[0])
		175	add_metric('pd_device_speed_gbps', pd_baselabel, attributes['Device Speed'].split('.')[0])
		176	add_metric('pd_commissioned_spare', pd_baselabel,
		177	int(settings['Commissioned Spare'] == 'Yes'))
		178	add_metric('pd_emergency_spare', pd_baselabel, int(settings['Emergency Spare'] == 'Yes'))
		179	pd_info_label += ',firmware="{}"'.format(attributes['Firmware Revision'].strip())
		180	except KeyError:
		181	pass
		182	add_metric('pd_info', pd_info_label, 1)
159		183
160		184
161	def add_metric(name, labels, value):	185	def add_metric(name, labels, value):
162	global metric_list	186	global metric_list
163	metric_list[name].append({	187	try:
164	'labels': labels,	188	metric_list[name].append({
165	'value': value,	189	'labels': labels,
166	})	190	'value': float(value),
		191	})
		192	except ValueError:
		193	pass
167		194
168		195
169	def print_all_metrics(metrics):	196	def print_all_metrics(metrics):
@@ -184,8 +211,11 @@ def get_storcli_json(storcli_args):
184	proc = subprocess.Popen(	211	proc = subprocess.Popen(
185	storcli_cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)	212	storcli_cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
186	output_json = proc.communicate()[0]	213	output_json = proc.communicate()[0]
		214	data = json.loads(output_json.decode("utf-8"))
187		215
188	return output_json.decode("utf-8")	216	if data["Controllers"][0]["Command Status"]["Status"] != "Success":
		217	SystemExit(1)
		218	return data
189		219
190		220
191	if __name__ == "__main__":	221	if __name__ == "__main__":