diff options
author | Benjamin Drung <benjamin.drung@cloud.ionos.com> | 2019-11-22 22:52:17 +0100 |
---|---|---|
committer | Ben Kochie <superq@gmail.com> | 2019-11-22 13:52:17 -0800 |
commit | 04fbcfffa194b5a77c8de60ca38d2530a8dfeae3 (patch) | |
tree | 9670bf34a090444bc388b5d20b9ac46a8de80334 /collector | |
parent | 8b7df09d016dd282c99a29bbfd0157cbf16f0356 (diff) | |
download | prometheus_node_collector-04fbcfffa194b5a77c8de60ca38d2530a8dfeae3.tar.bz2 prometheus_node_collector-04fbcfffa194b5a77c8de60ca38d2530a8dfeae3.tar.xz prometheus_node_collector-04fbcfffa194b5a77c8de60ca38d2530a8dfeae3.zip |
Collect InfiniBand port state and physical state (#1357)
Collect the InfiniBand port state, the physical state, and the maximum
signal transfer rate.
Signed-off-by: Benjamin Drung <benjamin.drung@cloud.ionos.com>
Diffstat (limited to 'collector')
-rw-r--r-- | collector/fixtures/e2e-64k-page-output.txt | 15 | ||||
-rw-r--r-- | collector/fixtures/e2e-output.txt | 15 | ||||
-rw-r--r-- | collector/infiniband_linux.go | 7 |
3 files changed, 37 insertions, 0 deletions
diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 729deff..9f75f08 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt | |||
@@ -840,6 +840,11 @@ node_infiniband_multicast_packets_received_total{device="mlx4_0",port="2"} 0 | |||
840 | # TYPE node_infiniband_multicast_packets_transmitted_total counter | 840 | # TYPE node_infiniband_multicast_packets_transmitted_total counter |
841 | node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="1"} 16 | 841 | node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="1"} 16 |
842 | node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="2"} 0 | 842 | node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="2"} 0 |
843 | # HELP node_infiniband_physical_state_id Physical state of the InfiniBand port (0: no change, 1: sleep, 2: polling, 3: disable, 4: shift, 5: link up, 6: link error recover, 7: phytest) | ||
844 | # TYPE node_infiniband_physical_state_id gauge | ||
845 | node_infiniband_physical_state_id{device="i40iw0",port="1"} 5 | ||
846 | node_infiniband_physical_state_id{device="mlx4_0",port="1"} 5 | ||
847 | node_infiniband_physical_state_id{device="mlx4_0",port="2"} 5 | ||
843 | # HELP node_infiniband_port_constraint_errors_received_total Number of packets received on the switch physical port that are discarded | 848 | # HELP node_infiniband_port_constraint_errors_received_total Number of packets received on the switch physical port that are discarded |
844 | # TYPE node_infiniband_port_constraint_errors_received_total counter | 849 | # TYPE node_infiniband_port_constraint_errors_received_total counter |
845 | node_infiniband_port_constraint_errors_received_total{device="mlx4_0",port="1"} 0 | 850 | node_infiniband_port_constraint_errors_received_total{device="mlx4_0",port="1"} 0 |
@@ -872,6 +877,16 @@ node_infiniband_port_packets_transmitted_total{device="mlx4_0",port="1"} 6.23586 | |||
872 | # HELP node_infiniband_port_transmit_wait_total Number of ticks during which the port had data to transmit but no data was sent during the entire tick | 877 | # HELP node_infiniband_port_transmit_wait_total Number of ticks during which the port had data to transmit but no data was sent during the entire tick |
873 | # TYPE node_infiniband_port_transmit_wait_total counter | 878 | # TYPE node_infiniband_port_transmit_wait_total counter |
874 | node_infiniband_port_transmit_wait_total{device="mlx4_0",port="1"} 4.294967295e+09 | 879 | node_infiniband_port_transmit_wait_total{device="mlx4_0",port="1"} 4.294967295e+09 |
880 | # HELP node_infiniband_rate_bytes_per_second Maximum signal transfer rate | ||
881 | # TYPE node_infiniband_rate_bytes_per_second gauge | ||
882 | node_infiniband_rate_bytes_per_second{device="i40iw0",port="1"} 1.25e+09 | ||
883 | node_infiniband_rate_bytes_per_second{device="mlx4_0",port="1"} 5e+09 | ||
884 | node_infiniband_rate_bytes_per_second{device="mlx4_0",port="2"} 5e+09 | ||
885 | # HELP node_infiniband_state_id State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer) | ||
886 | # TYPE node_infiniband_state_id gauge | ||
887 | node_infiniband_state_id{device="i40iw0",port="1"} 4 | ||
888 | node_infiniband_state_id{device="mlx4_0",port="1"} 4 | ||
889 | node_infiniband_state_id{device="mlx4_0",port="2"} 4 | ||
875 | # HELP node_infiniband_unicast_packets_received_total Number of unicast packets received (including errors) | 890 | # HELP node_infiniband_unicast_packets_received_total Number of unicast packets received (including errors) |
876 | # TYPE node_infiniband_unicast_packets_received_total counter | 891 | # TYPE node_infiniband_unicast_packets_received_total counter |
877 | node_infiniband_unicast_packets_received_total{device="mlx4_0",port="1"} 61148 | 892 | node_infiniband_unicast_packets_received_total{device="mlx4_0",port="1"} 61148 |
diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 41129c4..c163346 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt | |||
@@ -840,6 +840,11 @@ node_infiniband_multicast_packets_received_total{device="mlx4_0",port="2"} 0 | |||
840 | # TYPE node_infiniband_multicast_packets_transmitted_total counter | 840 | # TYPE node_infiniband_multicast_packets_transmitted_total counter |
841 | node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="1"} 16 | 841 | node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="1"} 16 |
842 | node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="2"} 0 | 842 | node_infiniband_multicast_packets_transmitted_total{device="mlx4_0",port="2"} 0 |
843 | # HELP node_infiniband_physical_state_id Physical state of the InfiniBand port (0: no change, 1: sleep, 2: polling, 3: disable, 4: shift, 5: link up, 6: link error recover, 7: phytest) | ||
844 | # TYPE node_infiniband_physical_state_id gauge | ||
845 | node_infiniband_physical_state_id{device="i40iw0",port="1"} 5 | ||
846 | node_infiniband_physical_state_id{device="mlx4_0",port="1"} 5 | ||
847 | node_infiniband_physical_state_id{device="mlx4_0",port="2"} 5 | ||
843 | # HELP node_infiniband_port_constraint_errors_received_total Number of packets received on the switch physical port that are discarded | 848 | # HELP node_infiniband_port_constraint_errors_received_total Number of packets received on the switch physical port that are discarded |
844 | # TYPE node_infiniband_port_constraint_errors_received_total counter | 849 | # TYPE node_infiniband_port_constraint_errors_received_total counter |
845 | node_infiniband_port_constraint_errors_received_total{device="mlx4_0",port="1"} 0 | 850 | node_infiniband_port_constraint_errors_received_total{device="mlx4_0",port="1"} 0 |
@@ -872,6 +877,16 @@ node_infiniband_port_packets_transmitted_total{device="mlx4_0",port="1"} 6.23586 | |||
872 | # HELP node_infiniband_port_transmit_wait_total Number of ticks during which the port had data to transmit but no data was sent during the entire tick | 877 | # HELP node_infiniband_port_transmit_wait_total Number of ticks during which the port had data to transmit but no data was sent during the entire tick |
873 | # TYPE node_infiniband_port_transmit_wait_total counter | 878 | # TYPE node_infiniband_port_transmit_wait_total counter |
874 | node_infiniband_port_transmit_wait_total{device="mlx4_0",port="1"} 4.294967295e+09 | 879 | node_infiniband_port_transmit_wait_total{device="mlx4_0",port="1"} 4.294967295e+09 |
880 | # HELP node_infiniband_rate_bytes_per_second Maximum signal transfer rate | ||
881 | # TYPE node_infiniband_rate_bytes_per_second gauge | ||
882 | node_infiniband_rate_bytes_per_second{device="i40iw0",port="1"} 1.25e+09 | ||
883 | node_infiniband_rate_bytes_per_second{device="mlx4_0",port="1"} 5e+09 | ||
884 | node_infiniband_rate_bytes_per_second{device="mlx4_0",port="2"} 5e+09 | ||
885 | # HELP node_infiniband_state_id State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer) | ||
886 | # TYPE node_infiniband_state_id gauge | ||
887 | node_infiniband_state_id{device="i40iw0",port="1"} 4 | ||
888 | node_infiniband_state_id{device="mlx4_0",port="1"} 4 | ||
889 | node_infiniband_state_id{device="mlx4_0",port="2"} 4 | ||
875 | # HELP node_infiniband_unicast_packets_received_total Number of unicast packets received (including errors) | 890 | # HELP node_infiniband_unicast_packets_received_total Number of unicast packets received (including errors) |
876 | # TYPE node_infiniband_unicast_packets_received_total counter | 891 | # TYPE node_infiniband_unicast_packets_received_total counter |
877 | node_infiniband_unicast_packets_received_total{device="mlx4_0",port="1"} 61148 | 892 | node_infiniband_unicast_packets_received_total{device="mlx4_0",port="1"} 61148 |
diff --git a/collector/infiniband_linux.go b/collector/infiniband_linux.go index 237a913..828e40e 100644 --- a/collector/infiniband_linux.go +++ b/collector/infiniband_linux.go | |||
@@ -57,6 +57,7 @@ func NewInfiniBandCollector() (Collector, error) { | |||
57 | "link_error_recovery_total": "Number of times the link successfully recovered from an error state", | 57 | "link_error_recovery_total": "Number of times the link successfully recovered from an error state", |
58 | "multicast_packets_received_total": "Number of multicast packets received (including errors)", | 58 | "multicast_packets_received_total": "Number of multicast packets received (including errors)", |
59 | "multicast_packets_transmitted_total": "Number of multicast packets transmitted (including errors)", | 59 | "multicast_packets_transmitted_total": "Number of multicast packets transmitted (including errors)", |
60 | "physical_state_id": "Physical state of the InfiniBand port (0: no change, 1: sleep, 2: polling, 3: disable, 4: shift, 5: link up, 6: link error recover, 7: phytest)", | ||
60 | "port_constraint_errors_received_total": "Number of packets received on the switch physical port that are discarded", | 61 | "port_constraint_errors_received_total": "Number of packets received on the switch physical port that are discarded", |
61 | "port_constraint_errors_transmitted_total": "Number of packets not transmitted from the switch physical port", | 62 | "port_constraint_errors_transmitted_total": "Number of packets not transmitted from the switch physical port", |
62 | "port_data_received_bytes_total": "Number of data octets received on all links", | 63 | "port_data_received_bytes_total": "Number of data octets received on all links", |
@@ -67,6 +68,8 @@ func NewInfiniBandCollector() (Collector, error) { | |||
67 | "port_packets_received_total": "Number of packets received on all VLs by this port (including errors)", | 68 | "port_packets_received_total": "Number of packets received on all VLs by this port (including errors)", |
68 | "port_packets_transmitted_total": "Number of packets transmitted on all VLs from this port (including errors)", | 69 | "port_packets_transmitted_total": "Number of packets transmitted on all VLs from this port (including errors)", |
69 | "port_transmit_wait_total": "Number of ticks during which the port had data to transmit but no data was sent during the entire tick", | 70 | "port_transmit_wait_total": "Number of ticks during which the port had data to transmit but no data was sent during the entire tick", |
71 | "rate_bytes_per_second": "Maximum signal transfer rate", | ||
72 | "state_id": "State of the InfiniBand port (0: no change, 1: down, 2: init, 3: armed, 4: active, 5: act defer)", | ||
70 | "unicast_packets_received_total": "Number of unicast packets received (including errors)", | 73 | "unicast_packets_received_total": "Number of unicast packets received (including errors)", |
71 | "unicast_packets_transmitted_total": "Number of unicast packets transmitted (including errors)", | 74 | "unicast_packets_transmitted_total": "Number of unicast packets transmitted (including errors)", |
72 | } | 75 | } |
@@ -105,6 +108,10 @@ func (c *infinibandCollector) Update(ch chan<- prometheus.Metric) error { | |||
105 | for _, port := range device.Ports { | 108 | for _, port := range device.Ports { |
106 | portStr := strconv.FormatUint(uint64(port.Port), 10) | 109 | portStr := strconv.FormatUint(uint64(port.Port), 10) |
107 | 110 | ||
111 | c.pushMetric(ch, "state_id", uint64(port.StateID), port.Name, portStr, prometheus.GaugeValue) | ||
112 | c.pushMetric(ch, "physical_state_id", uint64(port.PhysStateID), port.Name, portStr, prometheus.GaugeValue) | ||
113 | c.pushMetric(ch, "rate_bytes_per_second", port.Rate, port.Name, portStr, prometheus.GaugeValue) | ||
114 | |||
108 | c.pushCounter(ch, "legacy_multicast_packets_received_total", port.Counters.LegacyPortMulticastRcvPackets, port.Name, portStr) | 115 | c.pushCounter(ch, "legacy_multicast_packets_received_total", port.Counters.LegacyPortMulticastRcvPackets, port.Name, portStr) |
109 | c.pushCounter(ch, "legacy_multicast_packets_transmitted_total", port.Counters.LegacyPortMulticastXmitPackets, port.Name, portStr) | 116 | c.pushCounter(ch, "legacy_multicast_packets_transmitted_total", port.Counters.LegacyPortMulticastXmitPackets, port.Name, portStr) |
110 | c.pushCounter(ch, "legacy_data_received_bytes_total", port.Counters.LegacyPortRcvData64, port.Name, portStr) | 117 | c.pushCounter(ch, "legacy_data_received_bytes_total", port.Counters.LegacyPortRcvData64, port.Name, portStr) |