diff options
author | Matt Layher <mdlayher@gmail.com> | 2019-11-25 14:41:38 -0500 |
---|---|---|
committer | Ben Kochie <superq@gmail.com> | 2019-11-25 13:41:38 -0600 |
commit | da6b66371f70822bc75303c79c85b80c46bcfae6 (patch) | |
tree | 09dc8c777daa4d93dbbff5597941ba5cc69a3138 /collector | |
parent | 3c2c4e7b3c473777b13e79cc84cc04e519fd3a9a (diff) | |
download | prometheus_node_collector-da6b66371f70822bc75303c79c85b80c46bcfae6.tar.bz2 prometheus_node_collector-da6b66371f70822bc75303c79c85b80c46bcfae6.tar.xz prometheus_node_collector-da6b66371f70822bc75303c79c85b80c46bcfae6.zip |
collector: reimplement sockstat collector with procfs (#1552)
* collector: reimplement sockstat collector with procfs
* collector: handle sockstat IPv4 disabled, debug logging
Signed-off-by: Matt Layher <mdlayher@gmail.com>
Diffstat (limited to 'collector')
-rw-r--r-- | collector/fixtures/e2e-output.txt | 20 | ||||
-rw-r--r-- | collector/fixtures/proc/net/sockstat6 | 5 | ||||
-rw-r--r-- | collector/fixtures/proc/net/sockstat_rhe4 | 5 | ||||
-rw-r--r-- | collector/sockstat_linux.go | 184 | ||||
-rw-r--r-- | collector/sockstat_linux_test.go | 59 |
5 files changed, 145 insertions, 128 deletions
diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 64be188..e915f92 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt | |||
@@ -2575,15 +2575,27 @@ node_scrape_collector_success{collector="vmstat"} 1 | |||
2575 | node_scrape_collector_success{collector="wifi"} 1 | 2575 | node_scrape_collector_success{collector="wifi"} 1 |
2576 | node_scrape_collector_success{collector="xfs"} 1 | 2576 | node_scrape_collector_success{collector="xfs"} 1 |
2577 | node_scrape_collector_success{collector="zfs"} 1 | 2577 | node_scrape_collector_success{collector="zfs"} 1 |
2578 | # HELP node_sockstat_FRAG6_inuse Number of FRAG6 sockets in state inuse. | ||
2579 | # TYPE node_sockstat_FRAG6_inuse gauge | ||
2580 | node_sockstat_FRAG6_inuse 0 | ||
2581 | # HELP node_sockstat_FRAG6_memory Number of FRAG6 sockets in state memory. | ||
2582 | # TYPE node_sockstat_FRAG6_memory gauge | ||
2583 | node_sockstat_FRAG6_memory 0 | ||
2578 | # HELP node_sockstat_FRAG_inuse Number of FRAG sockets in state inuse. | 2584 | # HELP node_sockstat_FRAG_inuse Number of FRAG sockets in state inuse. |
2579 | # TYPE node_sockstat_FRAG_inuse gauge | 2585 | # TYPE node_sockstat_FRAG_inuse gauge |
2580 | node_sockstat_FRAG_inuse 0 | 2586 | node_sockstat_FRAG_inuse 0 |
2581 | # HELP node_sockstat_FRAG_memory Number of FRAG sockets in state memory. | 2587 | # HELP node_sockstat_FRAG_memory Number of FRAG sockets in state memory. |
2582 | # TYPE node_sockstat_FRAG_memory gauge | 2588 | # TYPE node_sockstat_FRAG_memory gauge |
2583 | node_sockstat_FRAG_memory 0 | 2589 | node_sockstat_FRAG_memory 0 |
2590 | # HELP node_sockstat_RAW6_inuse Number of RAW6 sockets in state inuse. | ||
2591 | # TYPE node_sockstat_RAW6_inuse gauge | ||
2592 | node_sockstat_RAW6_inuse 1 | ||
2584 | # HELP node_sockstat_RAW_inuse Number of RAW sockets in state inuse. | 2593 | # HELP node_sockstat_RAW_inuse Number of RAW sockets in state inuse. |
2585 | # TYPE node_sockstat_RAW_inuse gauge | 2594 | # TYPE node_sockstat_RAW_inuse gauge |
2586 | node_sockstat_RAW_inuse 0 | 2595 | node_sockstat_RAW_inuse 0 |
2596 | # HELP node_sockstat_TCP6_inuse Number of TCP6 sockets in state inuse. | ||
2597 | # TYPE node_sockstat_TCP6_inuse gauge | ||
2598 | node_sockstat_TCP6_inuse 17 | ||
2587 | # HELP node_sockstat_TCP_alloc Number of TCP sockets in state alloc. | 2599 | # HELP node_sockstat_TCP_alloc Number of TCP sockets in state alloc. |
2588 | # TYPE node_sockstat_TCP_alloc gauge | 2600 | # TYPE node_sockstat_TCP_alloc gauge |
2589 | node_sockstat_TCP_alloc 17 | 2601 | node_sockstat_TCP_alloc 17 |
@@ -2602,6 +2614,12 @@ node_sockstat_TCP_orphan 0 | |||
2602 | # HELP node_sockstat_TCP_tw Number of TCP sockets in state tw. | 2614 | # HELP node_sockstat_TCP_tw Number of TCP sockets in state tw. |
2603 | # TYPE node_sockstat_TCP_tw gauge | 2615 | # TYPE node_sockstat_TCP_tw gauge |
2604 | node_sockstat_TCP_tw 4 | 2616 | node_sockstat_TCP_tw 4 |
2617 | # HELP node_sockstat_UDP6_inuse Number of UDP6 sockets in state inuse. | ||
2618 | # TYPE node_sockstat_UDP6_inuse gauge | ||
2619 | node_sockstat_UDP6_inuse 9 | ||
2620 | # HELP node_sockstat_UDPLITE6_inuse Number of UDPLITE6 sockets in state inuse. | ||
2621 | # TYPE node_sockstat_UDPLITE6_inuse gauge | ||
2622 | node_sockstat_UDPLITE6_inuse 0 | ||
2605 | # HELP node_sockstat_UDPLITE_inuse Number of UDPLITE sockets in state inuse. | 2623 | # HELP node_sockstat_UDPLITE_inuse Number of UDPLITE sockets in state inuse. |
2606 | # TYPE node_sockstat_UDPLITE_inuse gauge | 2624 | # TYPE node_sockstat_UDPLITE_inuse gauge |
2607 | node_sockstat_UDPLITE_inuse 0 | 2625 | node_sockstat_UDPLITE_inuse 0 |
@@ -2614,7 +2632,7 @@ node_sockstat_UDP_mem 0 | |||
2614 | # HELP node_sockstat_UDP_mem_bytes Number of UDP sockets in state mem_bytes. | 2632 | # HELP node_sockstat_UDP_mem_bytes Number of UDP sockets in state mem_bytes. |
2615 | # TYPE node_sockstat_UDP_mem_bytes gauge | 2633 | # TYPE node_sockstat_UDP_mem_bytes gauge |
2616 | node_sockstat_UDP_mem_bytes 0 | 2634 | node_sockstat_UDP_mem_bytes 0 |
2617 | # HELP node_sockstat_sockets_used Number of sockets sockets in state used. | 2635 | # HELP node_sockstat_sockets_used Number of IPv4 sockets in use. |
2618 | # TYPE node_sockstat_sockets_used gauge | 2636 | # TYPE node_sockstat_sockets_used gauge |
2619 | node_sockstat_sockets_used 229 | 2637 | node_sockstat_sockets_used 229 |
2620 | # HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read. | 2638 | # HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read. |
diff --git a/collector/fixtures/proc/net/sockstat6 b/collector/fixtures/proc/net/sockstat6 new file mode 100644 index 0000000..de5806d --- /dev/null +++ b/collector/fixtures/proc/net/sockstat6 | |||
@@ -0,0 +1,5 @@ | |||
1 | TCP6: inuse 17 | ||
2 | UDP6: inuse 9 | ||
3 | UDPLITE6: inuse 0 | ||
4 | RAW6: inuse 1 | ||
5 | FRAG6: inuse 0 memory 0 | ||
diff --git a/collector/fixtures/proc/net/sockstat_rhe4 b/collector/fixtures/proc/net/sockstat_rhe4 deleted file mode 100644 index 1e178f3..0000000 --- a/collector/fixtures/proc/net/sockstat_rhe4 +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | sockets: used 229 | ||
2 | TCP: inuse 4 orphan 0 tw 4 alloc 17 mem 1 | ||
3 | UDP: inuse 0 | ||
4 | RAW: inuse 0 | ||
5 | FRAG: inuse 0 memory 0 | ||
diff --git a/collector/sockstat_linux.go b/collector/sockstat_linux.go index 92847be..d6c4c72 100644 --- a/collector/sockstat_linux.go +++ b/collector/sockstat_linux.go | |||
@@ -16,14 +16,12 @@ | |||
16 | package collector | 16 | package collector |
17 | 17 | ||
18 | import ( | 18 | import ( |
19 | "bufio" | ||
20 | "fmt" | 19 | "fmt" |
21 | "io" | ||
22 | "os" | 20 | "os" |
23 | "strconv" | ||
24 | "strings" | ||
25 | 21 | ||
26 | "github.com/prometheus/client_golang/prometheus" | 22 | "github.com/prometheus/client_golang/prometheus" |
23 | "github.com/prometheus/common/log" | ||
24 | "github.com/prometheus/procfs" | ||
27 | ) | 25 | ) |
28 | 26 | ||
29 | const ( | 27 | const ( |
@@ -45,78 +43,138 @@ func NewSockStatCollector() (Collector, error) { | |||
45 | } | 43 | } |
46 | 44 | ||
47 | func (c *sockStatCollector) Update(ch chan<- prometheus.Metric) error { | 45 | func (c *sockStatCollector) Update(ch chan<- prometheus.Metric) error { |
48 | sockStats, err := getSockStats(procFilePath("net/sockstat")) | 46 | fs, err := procfs.NewFS(*procPath) |
49 | if err != nil { | 47 | if err != nil { |
50 | return fmt.Errorf("couldn't get sockstats: %s", err) | 48 | return fmt.Errorf("failed to open procfs: %v", err) |
51 | } | 49 | } |
52 | for protocol, protocolStats := range sockStats { | 50 | |
53 | for name, value := range protocolStats { | 51 | // If IPv4 and/or IPv6 are disabled on this kernel, handle it gracefully. |
54 | v, err := strconv.ParseFloat(value, 64) | 52 | stat4, err := fs.NetSockstat() |
55 | if err != nil { | 53 | switch { |
56 | return fmt.Errorf("invalid value %s in sockstats: %s", value, err) | 54 | case err == nil: |
57 | } | 55 | case os.IsNotExist(err): |
58 | ch <- prometheus.MustNewConstMetric( | 56 | log.Debug("IPv4 sockstat statistics not found, skipping") |
59 | prometheus.NewDesc( | 57 | default: |
60 | prometheus.BuildFQName(namespace, sockStatSubsystem, protocol+"_"+name), | 58 | return fmt.Errorf("failed to get IPv4 sockstat data: %v", err) |
61 | fmt.Sprintf("Number of %s sockets in state %s.", protocol, name), | ||
62 | nil, nil, | ||
63 | ), | ||
64 | prometheus.GaugeValue, v, | ||
65 | ) | ||
66 | } | ||
67 | } | 59 | } |
68 | return err | ||
69 | } | ||
70 | 60 | ||
71 | func getSockStats(fileName string) (map[string]map[string]string, error) { | 61 | stat6, err := fs.NetSockstat6() |
72 | file, err := os.Open(fileName) | 62 | switch { |
73 | if err != nil { | 63 | case err == nil: |
74 | return nil, err | 64 | case os.IsNotExist(err): |
65 | log.Debug("IPv6 sockstat statistics not found, skipping") | ||
66 | default: | ||
67 | return fmt.Errorf("failed to get IPv6 sockstat data: %v", err) | ||
68 | } | ||
69 | |||
70 | stats := []struct { | ||
71 | isIPv6 bool | ||
72 | stat *procfs.NetSockstat | ||
73 | }{ | ||
74 | { | ||
75 | stat: stat4, | ||
76 | }, | ||
77 | { | ||
78 | isIPv6: true, | ||
79 | stat: stat6, | ||
80 | }, | ||
75 | } | 81 | } |
76 | defer file.Close() | ||
77 | 82 | ||
78 | return parseSockStats(file, fileName) | 83 | for _, s := range stats { |
84 | c.update(ch, s.isIPv6, s.stat) | ||
85 | } | ||
86 | |||
87 | return nil | ||
79 | } | 88 | } |
80 | 89 | ||
81 | func parseSockStats(r io.Reader, fileName string) (map[string]map[string]string, error) { | 90 | func (c *sockStatCollector) update(ch chan<- prometheus.Metric, isIPv6 bool, s *procfs.NetSockstat) { |
82 | var ( | 91 | if s == nil { |
83 | sockStat = map[string]map[string]string{} | 92 | // IPv6 disabled or similar; nothing to do. |
84 | scanner = bufio.NewScanner(r) | 93 | return |
85 | ) | ||
86 | |||
87 | for scanner.Scan() { | ||
88 | line := strings.Split(scanner.Text(), " ") | ||
89 | // Remove trailing ':'. | ||
90 | protocol := line[0][:len(line[0])-1] | ||
91 | sockStat[protocol] = map[string]string{} | ||
92 | |||
93 | for i := 1; i < len(line) && i+1 < len(line); i++ { | ||
94 | sockStat[protocol][line[i]] = line[i+1] | ||
95 | i++ | ||
96 | } | ||
97 | } | 94 | } |
98 | if err := scanner.Err(); err != nil { | 95 | |
99 | return nil, err | 96 | // If sockstat contains the number of used sockets, export it. |
97 | if !isIPv6 && s.Used != nil { | ||
98 | // TODO: this must be updated if sockstat6 ever exports this data. | ||
99 | ch <- prometheus.MustNewConstMetric( | ||
100 | prometheus.NewDesc( | ||
101 | prometheus.BuildFQName(namespace, sockStatSubsystem, "sockets_used"), | ||
102 | "Number of IPv4 sockets in use.", | ||
103 | nil, | ||
104 | nil, | ||
105 | ), | ||
106 | prometheus.GaugeValue, | ||
107 | float64(*s.Used), | ||
108 | ) | ||
100 | } | 109 | } |
101 | 110 | ||
102 | // The mem metrics is the count of pages used. Multiply the mem metrics by | 111 | // A name and optional value for a sockstat metric. |
103 | // the page size from the kernel to get the number of bytes used. | 112 | type ssPair struct { |
104 | // | 113 | name string |
105 | // Update the TCP mem from page count to bytes. | 114 | v *int |
106 | pageCount, err := strconv.Atoi(sockStat["TCP"]["mem"]) | ||
107 | if err != nil { | ||
108 | return nil, fmt.Errorf("invalid value %s in sockstats: %s", sockStat["TCP"]["mem"], err) | ||
109 | } | 115 | } |
110 | sockStat["TCP"]["mem_bytes"] = strconv.Itoa(pageCount * pageSize) | ||
111 | 116 | ||
112 | // Update the UDP mem from page count to bytes. | 117 | // Previously these metric names were generated directly from the file output. |
113 | if udpMem := sockStat["UDP"]["mem"]; udpMem != "" { | 118 | // In order to keep the same level of compatibility, we must map the fields |
114 | pageCount, err = strconv.Atoi(udpMem) | 119 | // to their correct names. |
115 | if err != nil { | 120 | for _, p := range s.Protocols { |
116 | return nil, fmt.Errorf("invalid value %s in sockstats: %s", sockStat["UDP"]["mem"], err) | 121 | pairs := []ssPair{ |
122 | { | ||
123 | name: "inuse", | ||
124 | v: &p.InUse, | ||
125 | }, | ||
126 | { | ||
127 | name: "orphan", | ||
128 | v: p.Orphan, | ||
129 | }, | ||
130 | { | ||
131 | name: "tw", | ||
132 | v: p.TW, | ||
133 | }, | ||
134 | { | ||
135 | name: "alloc", | ||
136 | v: p.Alloc, | ||
137 | }, | ||
138 | { | ||
139 | name: "mem", | ||
140 | v: p.Mem, | ||
141 | }, | ||
142 | { | ||
143 | name: "memory", | ||
144 | v: p.Memory, | ||
145 | }, | ||
117 | } | 146 | } |
118 | sockStat["UDP"]["mem_bytes"] = strconv.Itoa(pageCount * pageSize) | ||
119 | } | ||
120 | 147 | ||
121 | return sockStat, nil | 148 | // Also export mem_bytes values for sockets which have a mem value |
149 | // stored in pages. | ||
150 | if p.Mem != nil { | ||
151 | v := *p.Mem * pageSize | ||
152 | pairs = append(pairs, ssPair{ | ||
153 | name: "mem_bytes", | ||
154 | v: &v, | ||
155 | }) | ||
156 | } | ||
157 | |||
158 | for _, pair := range pairs { | ||
159 | if pair.v == nil { | ||
160 | // This value is not set for this protocol; nothing to do. | ||
161 | continue | ||
162 | } | ||
163 | |||
164 | ch <- prometheus.MustNewConstMetric( | ||
165 | prometheus.NewDesc( | ||
166 | prometheus.BuildFQName( | ||
167 | namespace, | ||
168 | sockStatSubsystem, | ||
169 | fmt.Sprintf("%s_%s", p.Protocol, pair.name), | ||
170 | ), | ||
171 | fmt.Sprintf("Number of %s sockets in state %s.", p.Protocol, pair.name), | ||
172 | nil, | ||
173 | nil, | ||
174 | ), | ||
175 | prometheus.GaugeValue, | ||
176 | float64(*pair.v), | ||
177 | ) | ||
178 | } | ||
179 | } | ||
122 | } | 180 | } |
diff --git a/collector/sockstat_linux_test.go b/collector/sockstat_linux_test.go deleted file mode 100644 index 70bedba..0000000 --- a/collector/sockstat_linux_test.go +++ /dev/null | |||
@@ -1,59 +0,0 @@ | |||
1 | // Copyright 2015 The Prometheus Authors | ||
2 | // Licensed under the Apache License, Version 2.0 (the "License"); | ||
3 | // you may not use this file except in compliance with the License. | ||
4 | // You may obtain a copy of the License at | ||
5 | // | ||
6 | // http://www.apache.org/licenses/LICENSE-2.0 | ||
7 | // | ||
8 | // Unless required by applicable law or agreed to in writing, software | ||
9 | // distributed under the License is distributed on an "AS IS" BASIS, | ||
10 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
11 | // See the License for the specific language governing permissions and | ||
12 | // limitations under the License. | ||
13 | |||
14 | package collector | ||
15 | |||
16 | import ( | ||
17 | "os" | ||
18 | "strconv" | ||
19 | "testing" | ||
20 | ) | ||
21 | |||
22 | func TestSockStats(t *testing.T) { | ||
23 | testSockStats(t, "fixtures/proc/net/sockstat") | ||
24 | testSockStats(t, "fixtures/proc/net/sockstat_rhe4") | ||
25 | } | ||
26 | |||
27 | func testSockStats(t *testing.T, fixture string) { | ||
28 | file, err := os.Open(fixture) | ||
29 | if err != nil { | ||
30 | t.Fatal(err) | ||
31 | } | ||
32 | |||
33 | defer file.Close() | ||
34 | |||
35 | sockStats, err := parseSockStats(file, fixture) | ||
36 | if err != nil { | ||
37 | t.Fatal(err) | ||
38 | } | ||
39 | |||
40 | if want, got := "229", sockStats["sockets"]["used"]; want != got { | ||
41 | t.Errorf("want sockstat sockets used %s, got %s", want, got) | ||
42 | } | ||
43 | |||
44 | if want, got := "4", sockStats["TCP"]["tw"]; want != got { | ||
45 | t.Errorf("want sockstat TCP tw %s, got %s", want, got) | ||
46 | } | ||
47 | |||
48 | if want, got := "17", sockStats["TCP"]["alloc"]; want != got { | ||
49 | t.Errorf("want sockstat TCP alloc %s, got %s", want, got) | ||
50 | } | ||
51 | |||
52 | // The test file has 1 for TCP mem, which is one page. So we should get the | ||
53 | // page size in bytes back from sockstat_linux. We get the page size from | ||
54 | // os here because this value can change from system to system. The value is | ||
55 | // 4096 by default from linux 2.4 onward. | ||
56 | if want, got := strconv.Itoa(os.Getpagesize()), sockStats["TCP"]["mem_bytes"]; want != got { | ||
57 | t.Errorf("want sockstat TCP mem_bytes %s, got %s", want, got) | ||
58 | } | ||
59 | } | ||