diff options
author | Peter Bueschel <petersemailkonto@googlemail.com> | 2020-03-31 10:46:32 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-31 10:46:32 +0200 |
commit | da5972b5398cd67a8854e6b1ee6b861bfda5ef83 (patch) | |
tree | 1fd34052702883fc048d715328b7fcfe7c7f6d95 /collector | |
parent | 4891b01b6c9ea01864ba01b8d0e3ddd5b7e0d369 (diff) | |
download | prometheus_node_collector-da5972b5398cd67a8854e6b1ee6b861bfda5ef83.tar.bz2 prometheus_node_collector-da5972b5398cd67a8854e6b1ee6b861bfda5ef83.tar.xz prometheus_node_collector-da5972b5398cd67a8854e6b1ee6b861bfda5ef83.zip |
Add gauges for allocated memory for queued UDP and TCP packages (#1503)
* Two new states will be added to the tcpstat collector called rx_queued_bytes and tx_queued_bytes.
For UDP datagrams an additional collector 'udp_queues' can be used to expose the total lengths of the tx_queue and rx_queue.
@SuperQ and @discordianfish this changes gives us the option to check for overloaded UDP + TCP processing.
The names of the new TCP states and the UDP metric can be discussed.
The current reasons are just:
I don't want to add another collector for the same exposed file, so I just added the new states to the tcpstat collector.
I chose the name 'udp_queue' instead of 'udpstat' as UDP has no state.
Signed-off-by: Peter Bueschel <peter.bueschel@logmein.com>
Diffstat (limited to 'collector')
-rw-r--r-- | collector/fixtures/e2e-output.txt | 5 | ||||
-rw-r--r-- | collector/fixtures/proc/net/tcpstat | 4 | ||||
-rw-r--r-- | collector/fixtures/proc/net/udp | 2 | ||||
-rw-r--r-- | collector/tcpstat_linux.go | 28 | ||||
-rw-r--r-- | collector/tcpstat_linux_test.go | 64 | ||||
-rw-r--r-- | collector/udp_queues_linux.go | 87 |
6 files changed, 186 insertions, 4 deletions
diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 907b420..5a2ba25 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt | |||
@@ -2644,6 +2644,7 @@ node_scrape_collector_success{collector="softnet"} 1 | |||
2644 | node_scrape_collector_success{collector="stat"} 1 | 2644 | node_scrape_collector_success{collector="stat"} 1 |
2645 | node_scrape_collector_success{collector="textfile"} 1 | 2645 | node_scrape_collector_success{collector="textfile"} 1 |
2646 | node_scrape_collector_success{collector="thermal_zone"} 1 | 2646 | node_scrape_collector_success{collector="thermal_zone"} 1 |
2647 | node_scrape_collector_success{collector="udp_queues"} 1 | ||
2647 | node_scrape_collector_success{collector="vmstat"} 1 | 2648 | node_scrape_collector_success{collector="vmstat"} 1 |
2648 | node_scrape_collector_success{collector="wifi"} 1 | 2649 | node_scrape_collector_success{collector="wifi"} 1 |
2649 | node_scrape_collector_success{collector="xfs"} 1 | 2650 | node_scrape_collector_success{collector="xfs"} 1 |
@@ -2734,6 +2735,10 @@ node_textfile_scrape_error 0 | |||
2734 | # HELP node_thermal_zone_temp Zone temperature in Celsius | 2735 | # HELP node_thermal_zone_temp Zone temperature in Celsius |
2735 | # TYPE node_thermal_zone_temp gauge | 2736 | # TYPE node_thermal_zone_temp gauge |
2736 | node_thermal_zone_temp{type="cpu-thermal",zone="0"} 12.376 | 2737 | node_thermal_zone_temp{type="cpu-thermal",zone="0"} 12.376 |
2738 | # HELP node_udp_queues Number of allocated memory in the kernel for UDP datagrams in bytes. | ||
2739 | # TYPE node_udp_queues gauge | ||
2740 | node_udp_queues{ip="v4",queue="rx"} 0 | ||
2741 | node_udp_queues{ip="v4",queue="tx"} 21 | ||
2737 | # HELP node_vmstat_oom_kill /proc/vmstat information field oom_kill. | 2742 | # HELP node_vmstat_oom_kill /proc/vmstat information field oom_kill. |
2738 | # TYPE node_vmstat_oom_kill untyped | 2743 | # TYPE node_vmstat_oom_kill untyped |
2739 | node_vmstat_oom_kill 0 | 2744 | node_vmstat_oom_kill 0 |
diff --git a/collector/fixtures/proc/net/tcpstat b/collector/fixtures/proc/net/tcpstat index 8b3777a..352c00b 100644 --- a/collector/fixtures/proc/net/tcpstat +++ b/collector/fixtures/proc/net/tcpstat | |||
@@ -1,3 +1,3 @@ | |||
1 | sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode | 1 | sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode |
2 | 0: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 2740 1 ffff88003d3af3c0 100 0 0 10 0 | 2 | 0: 00000000:0016 00000000:0000 0A 00000015:00000000 00:00000000 00000000 0 0 2740 1 ffff88003d3af3c0 100 0 0 10 0 |
3 | 1: 0F02000A:0016 0202000A:8B6B 01 00000000:00000000 02:000AC99B 00000000 0 0 3652 4 ffff88003d3ae040 21 4 31 47 46 | 3 | 1: 0F02000A:0016 0202000A:8B6B 01 00000015:00000001 02:000AC99B 00000000 0 0 3652 4 ffff88003d3ae040 21 4 31 47 46 |
diff --git a/collector/fixtures/proc/net/udp b/collector/fixtures/proc/net/udp new file mode 100644 index 0000000..3c50524 --- /dev/null +++ b/collector/fixtures/proc/net/udp | |||
@@ -0,0 +1,2 @@ | |||
1 | sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode | ||
2 | 0: 00000000:0016 00000000:0000 0A 00000015:00000000 00:00000000 00000000 0 0 2740 1 ffff88003d3af3c0 100 0 0 10 0 | ||
diff --git a/collector/tcpstat_linux.go b/collector/tcpstat_linux.go index 9b09e9a..db9c655 100644 --- a/collector/tcpstat_linux.go +++ b/collector/tcpstat_linux.go | |||
@@ -52,6 +52,10 @@ const ( | |||
52 | tcpListen | 52 | tcpListen |
53 | // TCP_CLOSING | 53 | // TCP_CLOSING |
54 | tcpClosing | 54 | tcpClosing |
55 | // TCP_RX_BUFFER | ||
56 | tcpRxQueuedBytes | ||
57 | // TCP_TX_BUFFER | ||
58 | tcpTxQueuedBytes | ||
55 | ) | 59 | ) |
56 | 60 | ||
57 | type tcpStatCollector struct { | 61 | type tcpStatCollector struct { |
@@ -122,16 +126,34 @@ func parseTCPStats(r io.Reader) (map[tcpConnectionState]float64, error) { | |||
122 | if len(parts) == 0 { | 126 | if len(parts) == 0 { |
123 | continue | 127 | continue |
124 | } | 128 | } |
125 | if len(parts) < 4 { | 129 | if len(parts) < 5 { |
126 | return nil, fmt.Errorf("invalid TCP stats line: %q", line) | 130 | return nil, fmt.Errorf("invalid TCP stats line: %q", line) |
127 | } | 131 | } |
128 | 132 | ||
133 | qu := strings.Split(parts[4], ":") | ||
134 | if len(qu) < 2 { | ||
135 | return nil, fmt.Errorf("cannot parse tx_queues and rx_queues: %q", line) | ||
136 | } | ||
137 | |||
138 | tx, err := strconv.ParseUint(qu[0], 16, 64) | ||
139 | if err != nil { | ||
140 | return nil, err | ||
141 | } | ||
142 | tcpStats[tcpConnectionState(tcpTxQueuedBytes)] += float64(tx) | ||
143 | |||
144 | rx, err := strconv.ParseUint(qu[1], 16, 64) | ||
145 | if err != nil { | ||
146 | return nil, err | ||
147 | } | ||
148 | tcpStats[tcpConnectionState(tcpRxQueuedBytes)] += float64(rx) | ||
149 | |||
129 | st, err := strconv.ParseInt(parts[3], 16, 8) | 150 | st, err := strconv.ParseInt(parts[3], 16, 8) |
130 | if err != nil { | 151 | if err != nil { |
131 | return nil, err | 152 | return nil, err |
132 | } | 153 | } |
133 | 154 | ||
134 | tcpStats[tcpConnectionState(st)]++ | 155 | tcpStats[tcpConnectionState(st)]++ |
156 | |||
135 | } | 157 | } |
136 | 158 | ||
137 | return tcpStats, nil | 159 | return tcpStats, nil |
@@ -161,6 +183,10 @@ func (st tcpConnectionState) String() string { | |||
161 | return "listen" | 183 | return "listen" |
162 | case tcpClosing: | 184 | case tcpClosing: |
163 | return "closing" | 185 | return "closing" |
186 | case tcpRxQueuedBytes: | ||
187 | return "rx_queued_bytes" | ||
188 | case tcpTxQueuedBytes: | ||
189 | return "tx_queued_bytes" | ||
164 | default: | 190 | default: |
165 | return "unknown" | 191 | return "unknown" |
166 | } | 192 | } |
diff --git a/collector/tcpstat_linux_test.go b/collector/tcpstat_linux_test.go index f4c3b36..b609b84 100644 --- a/collector/tcpstat_linux_test.go +++ b/collector/tcpstat_linux_test.go | |||
@@ -28,8 +28,27 @@ func Test_parseTCPStatsError(t *testing.T) { | |||
28 | name: "too few fields", | 28 | name: "too few fields", |
29 | in: "sl local_address\n 0: 00000000:0016", | 29 | in: "sl local_address\n 0: 00000000:0016", |
30 | }, | 30 | }, |
31 | { | ||
32 | name: "missing colon in tx-rx field", | ||
33 | in: "sl local_address rem_address st tx_queue rx_queue\n" + | ||
34 | " 1: 0F02000A:0016 0202000A:8B6B 01 0000000000000001", | ||
35 | }, | ||
36 | { | ||
37 | name: "tx parsing issue", | ||
38 | in: "sl local_address rem_address st tx_queue rx_queue\n" + | ||
39 | " 1: 0F02000A:0016 0202000A:8B6B 01 0000000x:00000001", | ||
40 | }, | ||
41 | { | ||
42 | name: "rx parsing issue", | ||
43 | in: "sl local_address rem_address st tx_queue rx_queue\n" + | ||
44 | " 1: 0F02000A:0016 0202000A:8B6B 01 00000000:0000000x", | ||
45 | }, | ||
46 | { | ||
47 | name: "state parsing issue", | ||
48 | in: "sl local_address rem_address st tx_queue rx_queue\n" + | ||
49 | " 1: 0F02000A:0016 0202000A:8B6B 0H 00000000:00000001", | ||
50 | }, | ||
31 | } | 51 | } |
32 | |||
33 | for _, tt := range tests { | 52 | for _, tt := range tests { |
34 | t.Run(tt.name, func(t *testing.T) { | 53 | t.Run(tt.name, func(t *testing.T) { |
35 | if _, err := parseTCPStats(strings.NewReader(tt.in)); err == nil { | 54 | if _, err := parseTCPStats(strings.NewReader(tt.in)); err == nil { |
@@ -40,6 +59,14 @@ func Test_parseTCPStatsError(t *testing.T) { | |||
40 | } | 59 | } |
41 | 60 | ||
42 | func TestTCPStat(t *testing.T) { | 61 | func TestTCPStat(t *testing.T) { |
62 | |||
63 | noFile, _ := os.Open("follow the white rabbit") | ||
64 | defer noFile.Close() | ||
65 | |||
66 | if _, err := parseTCPStats(noFile); err == nil { | ||
67 | t.Fatal("expected an error, but none occurred") | ||
68 | } | ||
69 | |||
43 | file, err := os.Open("fixtures/proc/net/tcpstat") | 70 | file, err := os.Open("fixtures/proc/net/tcpstat") |
44 | if err != nil { | 71 | if err != nil { |
45 | t.Fatal(err) | 72 | t.Fatal(err) |
@@ -58,4 +85,39 @@ func TestTCPStat(t *testing.T) { | |||
58 | if want, got := 1, int(tcpStats[tcpListen]); want != got { | 85 | if want, got := 1, int(tcpStats[tcpListen]); want != got { |
59 | t.Errorf("want tcpstat number of listen state %d, got %d", want, got) | 86 | t.Errorf("want tcpstat number of listen state %d, got %d", want, got) |
60 | } | 87 | } |
88 | |||
89 | if want, got := 42, int(tcpStats[tcpTxQueuedBytes]); want != got { | ||
90 | t.Errorf("want tcpstat number of bytes in tx queue %d, got %d", want, got) | ||
91 | } | ||
92 | if want, got := 1, int(tcpStats[tcpRxQueuedBytes]); want != got { | ||
93 | t.Errorf("want tcpstat number of bytes in rx queue %d, got %d", want, got) | ||
94 | } | ||
95 | |||
96 | } | ||
97 | |||
98 | func Test_getTCPStats(t *testing.T) { | ||
99 | type args struct { | ||
100 | statsFile string | ||
101 | } | ||
102 | tests := []struct { | ||
103 | name string | ||
104 | args args | ||
105 | wantErr bool | ||
106 | }{ | ||
107 | { | ||
108 | name: "file not found", | ||
109 | args: args{statsFile: "somewhere over the rainbow"}, | ||
110 | wantErr: true, | ||
111 | }, | ||
112 | } | ||
113 | for _, tt := range tests { | ||
114 | t.Run(tt.name, func(t *testing.T) { | ||
115 | _, err := getTCPStats(tt.args.statsFile) | ||
116 | if (err != nil) != tt.wantErr { | ||
117 | t.Errorf("getTCPStats() error = %v, wantErr %v", err, tt.wantErr) | ||
118 | return | ||
119 | } | ||
120 | // other cases are covered by TestTCPStat() | ||
121 | }) | ||
122 | } | ||
61 | } | 123 | } |
diff --git a/collector/udp_queues_linux.go b/collector/udp_queues_linux.go new file mode 100644 index 0000000..512c010 --- /dev/null +++ b/collector/udp_queues_linux.go | |||
@@ -0,0 +1,87 @@ | |||
1 | // Copyright 2015 The Prometheus Authors | ||
2 | // Licensed under the Apache License, Version 2.0 (the "License"); | ||
3 | // you may not use this file except in compliance with the License. | ||
4 | // You may obtain a copy of the License at | ||
5 | // | ||
6 | // http://www.apache.org/licenses/LICENSE-2.0 | ||
7 | // | ||
8 | // Unless required by applicable law or agreed to in writing, software | ||
9 | // distributed under the License is distributed on an "AS IS" BASIS, | ||
10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
11 | // See the License for the specific language governing permissions and | ||
12 | // limitations under the License. | ||
13 | |||
14 | // +build !noudp_queues | ||
15 | |||
16 | package collector | ||
17 | |||
18 | import ( | ||
19 | "fmt" | ||
20 | "os" | ||
21 | |||
22 | "github.com/go-kit/kit/log" | ||
23 | "github.com/go-kit/kit/log/level" | ||
24 | "github.com/prometheus/client_golang/prometheus" | ||
25 | "github.com/prometheus/procfs" | ||
26 | ) | ||
27 | |||
28 | type ( | ||
29 | udpQueuesCollector struct { | ||
30 | fs procfs.FS | ||
31 | desc *prometheus.Desc | ||
32 | logger log.Logger | ||
33 | } | ||
34 | ) | ||
35 | |||
36 | func init() { | ||
37 | registerCollector("udp_queues", defaultEnabled, NewUDPqueuesCollector) | ||
38 | } | ||
39 | |||
40 | // NewUDPqueuesCollector returns a new Collector exposing network udp queued bytes. | ||
41 | func NewUDPqueuesCollector(logger log.Logger) (Collector, error) { | ||
42 | fs, err := procfs.NewFS(*procPath) | ||
43 | if err != nil { | ||
44 | return nil, fmt.Errorf("failed to open procfs: %v", err) | ||
45 | } | ||
46 | return &udpQueuesCollector{ | ||
47 | fs: fs, | ||
48 | desc: prometheus.NewDesc( | ||
49 | prometheus.BuildFQName(namespace, "udp", "queues"), | ||
50 | "Number of allocated memory in the kernel for UDP datagrams in bytes.", | ||
51 | []string{"queue", "ip"}, nil, | ||
52 | ), | ||
53 | logger: logger, | ||
54 | }, nil | ||
55 | } | ||
56 | |||
57 | func (c *udpQueuesCollector) Update(ch chan<- prometheus.Metric) error { | ||
58 | |||
59 | s4, errIPv4 := c.fs.NetUDPSummary() | ||
60 | if errIPv4 == nil { | ||
61 | ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s4.TxQueueLength), "tx", "v4") | ||
62 | ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s4.RxQueueLength), "rx", "v4") | ||
63 | } else { | ||
64 | if os.IsNotExist(errIPv4) { | ||
65 | level.Debug(c.logger).Log("msg", "not collecting ipv4 based metrics") | ||
66 | } else { | ||
67 | return fmt.Errorf("couldn't get upd queued bytes: %s", errIPv4) | ||
68 | } | ||
69 | } | ||
70 | |||
71 | s6, errIPv6 := c.fs.NetUDP6Summary() | ||
72 | if errIPv6 == nil { | ||
73 | ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s6.TxQueueLength), "tx", "v6") | ||
74 | ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s6.RxQueueLength), "rx", "v6") | ||
75 | } else { | ||
76 | if os.IsNotExist(errIPv6) { | ||
77 | level.Debug(c.logger).Log("msg", "not collecting ipv6 based metrics") | ||
78 | } else { | ||
79 | return fmt.Errorf("couldn't get upd6 queued bytes: %s", errIPv6) | ||
80 | } | ||
81 | } | ||
82 | |||
83 | if os.IsNotExist(errIPv4) && os.IsNotExist(errIPv6) { | ||
84 | return ErrNoData | ||
85 | } | ||
86 | return nil | ||
87 | } | ||