aboutsummaryrefslogtreecommitdiff
path: root/collector
diff options
context:
space:
mode:
authorBenjamin Drung <benjamin.drung@cloud.ionos.com>2019-11-22 22:52:17 +0100
committerBen Kochie <superq@gmail.com>2019-11-22 13:52:17 -0800
commit04fbcfffa194b5a77c8de60ca38d2530a8dfeae3 (patch)
tree9670bf34a090444bc388b5d20b9ac46a8de80334 /collector
parent8b7df09d016dd282c99a29bbfd0157cbf16f0356 (diff)
downloadprometheus_node_collector-04fbcfffa194b5a77c8de60ca38d2530a8dfeae3.tar.bz2
prometheus_node_collector-04fbcfffa194b5a77c8de60ca38d2530a8dfeae3.tar.xz
prometheus_node_collector-04fbcfffa194b5a77c8de60ca38d2530a8dfeae3.zip
Collect InfiniBand port state and physical state (#1357)
Collect the InfiniBand port state, the physical state, and the maximum signal transfer rate. Signed-off-by: Benjamin Drung <benjamin.drung@cloud.ionos.com>
Diffstat (limited to 'collector')
-rw-r--r--collector/fixtures/e2e-64k-page-output.txt15
-rw-r--r--collector/fixtures/e2e-output.txt15
-rw-r--r--collector/infiniband_linux.go7
3 files changed, 37 insertions, 0 deletions
diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt
index 729deff..9f75f08 100644
--- a/collector/fixtures/e2e-64k-page-output.txt
+++ b/collector/fixtures/e2e-64k-page-output.txt
@@ -840,6 +840,11 @@ node_infiniband_multicast_packets_received_total{device="mlx4_0",port="2"} 0
840# TYPE node_infiniband_multicast_packets_transmitted_total counter 840# TYPE node_infiniband_multicast_packets_transmitted_total counter
841node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="1"} 16 841node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="1"} 16
842node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="2"} 0 842node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="2"} 0
843# HELP node_infiniband_physical_state_id Physical state of the InfiniBand port (0: no change, 1: sleep, 2: polling, 3: disable, 4: shift, 5: link up, 6: link error recover, 7: phytest)
844# TYPE node_infiniband_physical_state_id gauge
845node_infiniband_physical_state_id{device="i40iw0",port="1"} 5
846node_infiniband_physical_state_id{device="mlx4_0",port="1"} 5
847node_infiniband_physical_state_id{device="mlx4_0",port="2"} 5
843# HELP node_infiniband_port_constraint_errors_received_total Number of packets received on the switch physical port that are discarded 848# HELP node_infiniband_port_constraint_errors_received_total Number of packets received on the switch physical port that are discarded
844# TYPE node_infiniband_port_constraint_errors_received_total counter 849# TYPE node_infiniband_port_constraint_errors_received_total counter
845node_infiniband_port_constraint_errors_received_total{device="mlx4_0",port="1"} 0 850node_infiniband_port_constraint_errors_received_total{device="mlx4_0",port="1"} 0
@@ -872,6 +877,16 @@ node_infiniband_port_packets_transmitted_total{device="mlx4_0",port="1"} 6.23586
872# HELP node_infiniband_port_transmit_wait_total Number of ticks during which the port had data to transmit but no data was sent during the entire tick 877# HELP node_infiniband_port_transmit_wait_total Number of ticks during which the port had data to transmit but no data was sent during the entire tick
873# TYPE node_infiniband_port_transmit_wait_total counter 878# TYPE node_infiniband_port_transmit_wait_total counter
874node_infiniband_port_transmit_wait_total{device="mlx4_0",port="1"} 4.294967295e+09 879node_infiniband_port_transmit_wait_total{device="mlx4_0",port="1"} 4.294967295e+09
880# HELP node_infiniband_rate_bytes_per_second Maximum signal transfer rate
881# TYPE node_infiniband_rate_bytes_per_second gauge
882node_infiniband_rate_bytes_per_second{device="i40iw0",port="1"} 1.25e+09
883node_infiniband_rate_bytes_per_second{device="mlx4_0",port="1"} 5e+09
884node_infiniband_rate_bytes_per_second{device="mlx4_0",port="2"} 5e+09
885# HELP node_infiniband_state_id State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer)
886# TYPE node_infiniband_state_id gauge
887node_infiniband_state_id{device="i40iw0",port="1"} 4
888node_infiniband_state_id{device="mlx4_0",port="1"} 4
889node_infiniband_state_id{device="mlx4_0",port="2"} 4
875# HELP node_infiniband_unicast_packets_received_total Number of unicast packets received (including errors) 890# HELP node_infiniband_unicast_packets_received_total Number of unicast packets received (including errors)
876# TYPE node_infiniband_unicast_packets_received_total counter 891# TYPE node_infiniband_unicast_packets_received_total counter
877node_infiniband_unicast_packets_received_total{device="mlx4_0",port="1"} 61148 892node_infiniband_unicast_packets_received_total{device="mlx4_0",port="1"} 61148
diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt
index 41129c4..c163346 100644
--- a/collector/fixtures/e2e-output.txt
+++ b/collector/fixtures/e2e-output.txt
@@ -840,6 +840,11 @@ node_infiniband_multicast_packets_received_total{device="mlx4_0",port="2"} 0
840# TYPE node_infiniband_multicast_packets_transmitted_total counter 840# TYPE node_infiniband_multicast_packets_transmitted_total counter
841node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="1"} 16 841node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="1"} 16
842node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="2"} 0 842node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="2"} 0
843# HELP node_infiniband_physical_state_id Physical state of the InfiniBand port (0: no change, 1: sleep, 2: polling, 3: disable, 4: shift, 5: link up, 6: link error recover, 7: phytest)
844# TYPE node_infiniband_physical_state_id gauge
845node_infiniband_physical_state_id{device="i40iw0",port="1"} 5
846node_infiniband_physical_state_id{device="mlx4_0",port="1"} 5
847node_infiniband_physical_state_id{device="mlx4_0",port="2"} 5
843# HELP node_infiniband_port_constraint_errors_received_total Number of packets received on the switch physical port that are discarded 848# HELP node_infiniband_port_constraint_errors_received_total Number of packets received on the switch physical port that are discarded
844# TYPE node_infiniband_port_constraint_errors_received_total counter 849# TYPE node_infiniband_port_constraint_errors_received_total counter
845node_infiniband_port_constraint_errors_received_total{device="mlx4_0",port="1"} 0 850node_infiniband_port_constraint_errors_received_total{device="mlx4_0",port="1"} 0
@@ -872,6 +877,16 @@ node_infiniband_port_packets_transmitted_total{device="mlx4_0",port="1"} 6.23586
872# HELP node_infiniband_port_transmit_wait_total Number of ticks during which the port had data to transmit but no data was sent during the entire tick 877# HELP node_infiniband_port_transmit_wait_total Number of ticks during which the port had data to transmit but no data was sent during the entire tick
873# TYPE node_infiniband_port_transmit_wait_total counter 878# TYPE node_infiniband_port_transmit_wait_total counter
874node_infiniband_port_transmit_wait_total{device="mlx4_0",port="1"} 4.294967295e+09 879node_infiniband_port_transmit_wait_total{device="mlx4_0",port="1"} 4.294967295e+09
880# HELP node_infiniband_rate_bytes_per_second Maximum signal transfer rate
881# TYPE node_infiniband_rate_bytes_per_second gauge
882node_infiniband_rate_bytes_per_second{device="i40iw0",port="1"} 1.25e+09
883node_infiniband_rate_bytes_per_second{device="mlx4_0",port="1"} 5e+09
884node_infiniband_rate_bytes_per_second{device="mlx4_0",port="2"} 5e+09
885# HELP node_infiniband_state_id State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer)
886# TYPE node_infiniband_state_id gauge
887node_infiniband_state_id{device="i40iw0",port="1"} 4
888node_infiniband_state_id{device="mlx4_0",port="1"} 4
889node_infiniband_state_id{device="mlx4_0",port="2"} 4
875# HELP node_infiniband_unicast_packets_received_total Number of unicast packets received (including errors) 890# HELP node_infiniband_unicast_packets_received_total Number of unicast packets received (including errors)
876# TYPE node_infiniband_unicast_packets_received_total counter 891# TYPE node_infiniband_unicast_packets_received_total counter
877node_infiniband_unicast_packets_received_total{device="mlx4_0",port="1"} 61148 892node_infiniband_unicast_packets_received_total{device="mlx4_0",port="1"} 61148
diff --git a/collector/infiniband_linux.go b/collector/infiniband_linux.go
index 237a913..828e40e 100644
--- a/collector/infiniband_linux.go
+++ b/collector/infiniband_linux.go
@@ -57,6 +57,7 @@ func NewInfiniBandCollector() (Collector, error) {
57 "link_error_recovery_total": "Number of times the link successfully recovered from an error state", 57 "link_error_recovery_total": "Number of times the link successfully recovered from an error state",
58 "multicast_packets_received_total": "Number of multicast packets received (including errors)", 58 "multicast_packets_received_total": "Number of multicast packets received (including errors)",
59 "multicast_packets_transmitted_total": "Number of multicast packets transmitted (including errors)", 59 "multicast_packets_transmitted_total": "Number of multicast packets transmitted (including errors)",
60 "physical_state_id": "Physical state of the InfiniBand port (0: no change, 1: sleep, 2: polling, 3: disable, 4: shift, 5: link up, 6: link error recover, 7: phytest)",
60 "port_constraint_errors_received_total": "Number of packets received on the switch physical port that are discarded", 61 "port_constraint_errors_received_total": "Number of packets received on the switch physical port that are discarded",
61 "port_constraint_errors_transmitted_total": "Number of packets not transmitted from the switch physical port", 62 "port_constraint_errors_transmitted_total": "Number of packets not transmitted from the switch physical port",
62 "port_data_received_bytes_total": "Number of data octets received on all links", 63 "port_data_received_bytes_total": "Number of data octets received on all links",
@@ -67,6 +68,8 @@ func NewInfiniBandCollector() (Collector, error) {
67 "port_packets_received_total": "Number of packets received on all VLs by this port (including errors)", 68 "port_packets_received_total": "Number of packets received on all VLs by this port (including errors)",
68 "port_packets_transmitted_total": "Number of packets transmitted on all VLs from this port (including errors)", 69 "port_packets_transmitted_total": "Number of packets transmitted on all VLs from this port (including errors)",
69 "port_transmit_wait_total": "Number of ticks during which the port had data to transmit but no data was sent during the entire tick", 70 "port_transmit_wait_total": "Number of ticks during which the port had data to transmit but no data was sent during the entire tick",
71 "rate_bytes_per_second": "Maximum signal transfer rate",
72 "state_id": "State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer)",
70 "unicast_packets_received_total": "Number of unicast packets received (including errors)", 73 "unicast_packets_received_total": "Number of unicast packets received (including errors)",
71 "unicast_packets_transmitted_total": "Number of unicast packets transmitted (including errors)", 74 "unicast_packets_transmitted_total": "Number of unicast packets transmitted (including errors)",
72 } 75 }
@@ -105,6 +108,10 @@ func (c *infinibandCollector) Update(ch chan<- prometheus.Metric) error {
105 for _, port := range device.Ports { 108 for _, port := range device.Ports {
106 portStr := strconv.FormatUint(uint64(port.Port), 10) 109 portStr := strconv.FormatUint(uint64(port.Port), 10)
107 110
111 c.pushMetric(ch, "state_id", uint64(port.StateID), port.Name, portStr, prometheus.GaugeValue)
112 c.pushMetric(ch, "physical_state_id", uint64(port.PhysStateID), port.Name, portStr, prometheus.GaugeValue)
113 c.pushMetric(ch, "rate_bytes_per_second", port.Rate, port.Name, portStr, prometheus.GaugeValue)
114
108 c.pushCounter(ch, "legacy_multicast_packets_received_total", port.Counters.LegacyPortMulticastRcvPackets, port.Name, portStr) 115 c.pushCounter(ch, "legacy_multicast_packets_received_total", port.Counters.LegacyPortMulticastRcvPackets, port.Name, portStr)
109 c.pushCounter(ch, "legacy_multicast_packets_transmitted_total", port.Counters.LegacyPortMulticastXmitPackets, port.Name, portStr) 116 c.pushCounter(ch, "legacy_multicast_packets_transmitted_total", port.Counters.LegacyPortMulticastXmitPackets, port.Name, portStr)
110 c.pushCounter(ch, "legacy_data_received_bytes_total", port.Counters.LegacyPortRcvData64, port.Name, portStr) 117 c.pushCounter(ch, "legacy_data_received_bytes_total", port.Counters.LegacyPortRcvData64, port.Name, portStr)