aboutsummaryrefslogtreecommitdiff
path: root/collector
diff options
context:
space:
mode:
authorPeter Bueschel <petersemailkonto@googlemail.com>2020-03-31 10:46:32 +0200
committerGitHub <noreply@github.com>2020-03-31 10:46:32 +0200
commitda5972b5398cd67a8854e6b1ee6b861bfda5ef83 (patch)
tree1fd34052702883fc048d715328b7fcfe7c7f6d95 /collector
parent4891b01b6c9ea01864ba01b8d0e3ddd5b7e0d369 (diff)
downloadprometheus_node_collector-da5972b5398cd67a8854e6b1ee6b861bfda5ef83.tar.bz2
prometheus_node_collector-da5972b5398cd67a8854e6b1ee6b861bfda5ef83.tar.xz
prometheus_node_collector-da5972b5398cd67a8854e6b1ee6b861bfda5ef83.zip
Add gauges for allocated memory for queued UDP and TCP packages (#1503)
* Two new states will be added to the tcpstat collector called rx_queued_bytes and tx_queued_bytes. For UDP datagrams an additional collector 'udp_queues' can be used to expose the total lengths of the tx_queue and rx_queue. @SuperQ and @discordianfish this changes gives us the option to check for overloaded UDP + TCP processing. The names of the new TCP states and the UDP metric can be discussed. The current reasons are just: I don't want to add another collector for the same exposed file, so I just added the new states to the tcpstat collector. I chose the name 'udp_queue' instead of 'udpstat' as UDP has no state. Signed-off-by: Peter Bueschel <peter.bueschel@logmein.com>
Diffstat (limited to 'collector')
-rw-r--r--collector/fixtures/e2e-output.txt5
-rw-r--r--collector/fixtures/proc/net/tcpstat4
-rw-r--r--collector/fixtures/proc/net/udp2
-rw-r--r--collector/tcpstat_linux.go28
-rw-r--r--collector/tcpstat_linux_test.go64
-rw-r--r--collector/udp_queues_linux.go87
6 files changed, 186 insertions, 4 deletions
diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt
index 907b420..5a2ba25 100644
--- a/collector/fixtures/e2e-output.txt
+++ b/collector/fixtures/e2e-output.txt
@@ -2644,6 +2644,7 @@ node_scrape_collector_success{collector="softnet"} 1
2644node_scrape_collector_success{collector="stat"} 1 2644node_scrape_collector_success{collector="stat"} 1
2645node_scrape_collector_success{collector="textfile"} 1 2645node_scrape_collector_success{collector="textfile"} 1
2646node_scrape_collector_success{collector="thermal_zone"} 1 2646node_scrape_collector_success{collector="thermal_zone"} 1
2647node_scrape_collector_success{collector="udp_queues"} 1
2647node_scrape_collector_success{collector="vmstat"} 1 2648node_scrape_collector_success{collector="vmstat"} 1
2648node_scrape_collector_success{collector="wifi"} 1 2649node_scrape_collector_success{collector="wifi"} 1
2649node_scrape_collector_success{collector="xfs"} 1 2650node_scrape_collector_success{collector="xfs"} 1
@@ -2734,6 +2735,10 @@ node_textfile_scrape_error 0
2734# HELP node_thermal_zone_temp Zone temperature in Celsius 2735# HELP node_thermal_zone_temp Zone temperature in Celsius
2735# TYPE node_thermal_zone_temp gauge 2736# TYPE node_thermal_zone_temp gauge
2736node_thermal_zone_temp{type="cpu-thermal",zone="0"} 12.376 2737node_thermal_zone_temp{type="cpu-thermal",zone="0"} 12.376
2738# HELP node_udp_queues Number of allocated memory in the kernel for UDP datagrams in bytes.
2739# TYPE node_udp_queues gauge
2740node_udp_queues{ip="v4",queue="rx"} 0
2741node_udp_queues{ip="v4",queue="tx"} 21
2737# HELP node_vmstat_oom_kill /proc/vmstat information field oom_kill. 2742# HELP node_vmstat_oom_kill /proc/vmstat information field oom_kill.
2738# TYPE node_vmstat_oom_kill untyped 2743# TYPE node_vmstat_oom_kill untyped
2739node_vmstat_oom_kill 0 2744node_vmstat_oom_kill 0
diff --git a/collector/fixtures/proc/net/tcpstat b/collector/fixtures/proc/net/tcpstat
index 8b3777a..352c00b 100644
--- a/collector/fixtures/proc/net/tcpstat
+++ b/collector/fixtures/proc/net/tcpstat
@@ -1,3 +1,3 @@
1 sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode 1 sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode
2 0: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 2740 1 ffff88003d3af3c0 100 0 0 10 0 2 0: 00000000:0016 00000000:0000 0A 00000015:00000000 00:00000000 00000000 0 0 2740 1 ffff88003d3af3c0 100 0 0 10 0
3 1: 0F02000A:0016 0202000A:8B6B 01 00000000:00000000 02:000AC99B 00000000 0 0 3652 4 ffff88003d3ae040 21 4 31 47 46 3 1: 0F02000A:0016 0202000A:8B6B 01 00000015:00000001 02:000AC99B 00000000 0 0 3652 4 ffff88003d3ae040 21 4 31 47 46
diff --git a/collector/fixtures/proc/net/udp b/collector/fixtures/proc/net/udp
new file mode 100644
index 0000000..3c50524
--- /dev/null
+++ b/collector/fixtures/proc/net/udp
@@ -0,0 +1,2 @@
1 sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode
2 0: 00000000:0016 00000000:0000 0A 00000015:00000000 00:00000000 00000000 0 0 2740 1 ffff88003d3af3c0 100 0 0 10 0
diff --git a/collector/tcpstat_linux.go b/collector/tcpstat_linux.go
index 9b09e9a..db9c655 100644
--- a/collector/tcpstat_linux.go
+++ b/collector/tcpstat_linux.go
@@ -52,6 +52,10 @@ const (
52 tcpListen 52 tcpListen
53 // TCP_CLOSING 53 // TCP_CLOSING
54 tcpClosing 54 tcpClosing
55 // TCP_RX_BUFFER
56 tcpRxQueuedBytes
57 // TCP_TX_BUFFER
58 tcpTxQueuedBytes
55) 59)
56 60
57type tcpStatCollector struct { 61type tcpStatCollector struct {
@@ -122,16 +126,34 @@ func parseTCPStats(r io.Reader) (map[tcpConnectionState]float64, error) {
122 if len(parts) == 0 { 126 if len(parts) == 0 {
123 continue 127 continue
124 } 128 }
125 if len(parts) < 4 { 129 if len(parts) < 5 {
126 return nil, fmt.Errorf("invalid TCP stats line: %q", line) 130 return nil, fmt.Errorf("invalid TCP stats line: %q", line)
127 } 131 }
128 132
133 qu := strings.Split(parts[4], ":")
134 if len(qu) < 2 {
135 return nil, fmt.Errorf("cannot parse tx_queues and rx_queues: %q", line)
136 }
137
138 tx, err := strconv.ParseUint(qu[0], 16, 64)
139 if err != nil {
140 return nil, err
141 }
142 tcpStats[tcpConnectionState(tcpTxQueuedBytes)] += float64(tx)
143
144 rx, err := strconv.ParseUint(qu[1], 16, 64)
145 if err != nil {
146 return nil, err
147 }
148 tcpStats[tcpConnectionState(tcpRxQueuedBytes)] += float64(rx)
149
129 st, err := strconv.ParseInt(parts[3], 16, 8) 150 st, err := strconv.ParseInt(parts[3], 16, 8)
130 if err != nil { 151 if err != nil {
131 return nil, err 152 return nil, err
132 } 153 }
133 154
134 tcpStats[tcpConnectionState(st)]++ 155 tcpStats[tcpConnectionState(st)]++
156
135 } 157 }
136 158
137 return tcpStats, nil 159 return tcpStats, nil
@@ -161,6 +183,10 @@ func (st tcpConnectionState) String() string {
161 return "listen" 183 return "listen"
162 case tcpClosing: 184 case tcpClosing:
163 return "closing" 185 return "closing"
186 case tcpRxQueuedBytes:
187 return "rx_queued_bytes"
188 case tcpTxQueuedBytes:
189 return "tx_queued_bytes"
164 default: 190 default:
165 return "unknown" 191 return "unknown"
166 } 192 }
diff --git a/collector/tcpstat_linux_test.go b/collector/tcpstat_linux_test.go
index f4c3b36..b609b84 100644
--- a/collector/tcpstat_linux_test.go
+++ b/collector/tcpstat_linux_test.go
@@ -28,8 +28,27 @@ func Test_parseTCPStatsError(t *testing.T) {
28 name: "too few fields", 28 name: "too few fields",
29 in: "sl local_address\n 0: 00000000:0016", 29 in: "sl local_address\n 0: 00000000:0016",
30 }, 30 },
31 {
32 name: "missing colon in tx-rx field",
33 in: "sl local_address rem_address st tx_queue rx_queue\n" +
34 " 1: 0F02000A:0016 0202000A:8B6B 01 0000000000000001",
35 },
36 {
37 name: "tx parsing issue",
38 in: "sl local_address rem_address st tx_queue rx_queue\n" +
39 " 1: 0F02000A:0016 0202000A:8B6B 01 0000000x:00000001",
40 },
41 {
42 name: "rx parsing issue",
43 in: "sl local_address rem_address st tx_queue rx_queue\n" +
44 " 1: 0F02000A:0016 0202000A:8B6B 01 00000000:0000000x",
45 },
46 {
47 name: "state parsing issue",
48 in: "sl local_address rem_address st tx_queue rx_queue\n" +
49 " 1: 0F02000A:0016 0202000A:8B6B 0H 00000000:00000001",
50 },
31 } 51 }
32
33 for _, tt := range tests { 52 for _, tt := range tests {
34 t.Run(tt.name, func(t *testing.T) { 53 t.Run(tt.name, func(t *testing.T) {
35 if _, err := parseTCPStats(strings.NewReader(tt.in)); err == nil { 54 if _, err := parseTCPStats(strings.NewReader(tt.in)); err == nil {
@@ -40,6 +59,14 @@ func Test_parseTCPStatsError(t *testing.T) {
40} 59}
41 60
42func TestTCPStat(t *testing.T) { 61func TestTCPStat(t *testing.T) {
62
63 noFile, _ := os.Open("follow the white rabbit")
64 defer noFile.Close()
65
66 if _, err := parseTCPStats(noFile); err == nil {
67 t.Fatal("expected an error, but none occurred")
68 }
69
43 file, err := os.Open("fixtures/proc/net/tcpstat") 70 file, err := os.Open("fixtures/proc/net/tcpstat")
44 if err != nil { 71 if err != nil {
45 t.Fatal(err) 72 t.Fatal(err)
@@ -58,4 +85,39 @@ func TestTCPStat(t *testing.T) {
58 if want, got := 1, int(tcpStats[tcpListen]); want != got { 85 if want, got := 1, int(tcpStats[tcpListen]); want != got {
59 t.Errorf("want tcpstat number of listen state %d, got %d", want, got) 86 t.Errorf("want tcpstat number of listen state %d, got %d", want, got)
60 } 87 }
88
89 if want, got := 42, int(tcpStats[tcpTxQueuedBytes]); want != got {
90 t.Errorf("want tcpstat number of bytes in tx queue %d, got %d", want, got)
91 }
92 if want, got := 1, int(tcpStats[tcpRxQueuedBytes]); want != got {
93 t.Errorf("want tcpstat number of bytes in rx queue %d, got %d", want, got)
94 }
95
96}
97
98func Test_getTCPStats(t *testing.T) {
99 type args struct {
100 statsFile string
101 }
102 tests := []struct {
103 name string
104 args args
105 wantErr bool
106 }{
107 {
108 name: "file not found",
109 args: args{statsFile: "somewhere over the rainbow"},
110 wantErr: true,
111 },
112 }
113 for _, tt := range tests {
114 t.Run(tt.name, func(t *testing.T) {
115 _, err := getTCPStats(tt.args.statsFile)
116 if (err != nil) != tt.wantErr {
117 t.Errorf("getTCPStats() error = %v, wantErr %v", err, tt.wantErr)
118 return
119 }
120 // other cases are covered by TestTCPStat()
121 })
122 }
61} 123}
diff --git a/collector/udp_queues_linux.go b/collector/udp_queues_linux.go
new file mode 100644
index 0000000..512c010
--- /dev/null
+++ b/collector/udp_queues_linux.go
@@ -0,0 +1,87 @@
1// Copyright 2015 The Prometheus Authors
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14// +build !noudp_queues
15
16package collector
17
18import (
19 "fmt"
20 "os"
21
22 "github.com/go-kit/kit/log"
23 "github.com/go-kit/kit/log/level"
24 "github.com/prometheus/client_golang/prometheus"
25 "github.com/prometheus/procfs"
26)
27
28type (
29 udpQueuesCollector struct {
30 fs procfs.FS
31 desc *prometheus.Desc
32 logger log.Logger
33 }
34)
35
36func init() {
37 registerCollector("udp_queues", defaultEnabled, NewUDPqueuesCollector)
38}
39
40// NewUDPqueuesCollector returns a new Collector exposing network udp queued bytes.
41func NewUDPqueuesCollector(logger log.Logger) (Collector, error) {
42 fs, err := procfs.NewFS(*procPath)
43 if err != nil {
44 return nil, fmt.Errorf("failed to open procfs: %v", err)
45 }
46 return &udpQueuesCollector{
47 fs: fs,
48 desc: prometheus.NewDesc(
49 prometheus.BuildFQName(namespace, "udp", "queues"),
50 "Number of allocated memory in the kernel for UDP datagrams in bytes.",
51 []string{"queue", "ip"}, nil,
52 ),
53 logger: logger,
54 }, nil
55}
56
57func (c *udpQueuesCollector) Update(ch chan<- prometheus.Metric) error {
58
59 s4, errIPv4 := c.fs.NetUDPSummary()
60 if errIPv4 == nil {
61 ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s4.TxQueueLength), "tx", "v4")
62 ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s4.RxQueueLength), "rx", "v4")
63 } else {
64 if os.IsNotExist(errIPv4) {
65 level.Debug(c.logger).Log("msg", "not collecting ipv4 based metrics")
66 } else {
67 return fmt.Errorf("couldn't get upd queued bytes: %s", errIPv4)
68 }
69 }
70
71 s6, errIPv6 := c.fs.NetUDP6Summary()
72 if errIPv6 == nil {
73 ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s6.TxQueueLength), "tx", "v6")
74 ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s6.RxQueueLength), "rx", "v6")
75 } else {
76 if os.IsNotExist(errIPv6) {
77 level.Debug(c.logger).Log("msg", "not collecting ipv6 based metrics")
78 } else {
79 return fmt.Errorf("couldn't get upd6 queued bytes: %s", errIPv6)
80 }
81 }
82
83 if os.IsNotExist(errIPv4) && os.IsNotExist(errIPv6) {
84 return ErrNoData
85 }
86 return nil
87}