diff options
author | Daniel Hodges <hodges.daniel.scott@gmail.com> | 2020-04-17 06:02:08 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-17 12:02:08 +0200 |
commit | b14168cf6ad2fd40bbe53b29eebea149dae31105 (patch) | |
tree | 49fb89f1426227e12750fc368118a13864da9bb9 | |
parent | 44357ed677f7845ab8d202bfc277f341b63e1fdc (diff) | |
download | prometheus_node_collector-b14168cf6ad2fd40bbe53b29eebea149dae31105.tar.bz2 prometheus_node_collector-b14168cf6ad2fd40bbe53b29eebea149dae31105.tar.xz prometheus_node_collector-b14168cf6ad2fd40bbe53b29eebea149dae31105.zip |
Add perf tracepoint collection flag (#1664)
* Add tracepoint collector option for perf collector
Signed-off-by: Daniel Hodges <hodges.daniel.scott@gmail.com>
-rw-r--r-- | README.md | 7 | ||||
-rw-r--r-- | collector/perf_linux.go | 185 | ||||
-rw-r--r-- | collector/perf_linux_test.go | 70 |
3 files changed, 245 insertions, 17 deletions
@@ -96,6 +96,13 @@ configuration is zero indexed and can also take a stride value; e.g. | |||
96 | `--collector.perf --collector.perf.cpus=1-10:5` would collect on CPUs | 96 | `--collector.perf --collector.perf.cpus=1-10:5` would collect on CPUs |
97 | 1, 5, and 10. | 97 | 1, 5, and 10. |
98 | 98 | ||
99 | The perf collector is also able to collect | ||
100 | [tracepoint](https://www.kernel.org/doc/html/latest/core-api/tracepoint.html) | ||
101 | counts when using the `--collector.perf.tracepoint` flag. Tracepoints can be | ||
102 | found using [`perf list`](http://man7.org/linux/man-pages/man1/perf.1.html) or | ||
103 | from debugfs. And example usage of this would be | ||
104 | `--collector.perf.tracepoint="sched:sched_process_exec"`. | ||
105 | |||
99 | 106 | ||
100 | Name | Description | OS | 107 | Name | Description | OS |
101 | ---------|-------------|---- | 108 | ---------|-------------|---- |
diff --git a/collector/perf_linux.go b/collector/perf_linux.go index 6d19683..e452754 100644 --- a/collector/perf_linux.go +++ b/collector/perf_linux.go | |||
@@ -20,8 +20,10 @@ import ( | |||
20 | "strings" | 20 | "strings" |
21 | 21 | ||
22 | "github.com/go-kit/kit/log" | 22 | "github.com/go-kit/kit/log" |
23 | "github.com/go-kit/kit/log/level" | ||
23 | "github.com/hodgesds/perf-utils" | 24 | "github.com/hodgesds/perf-utils" |
24 | "github.com/prometheus/client_golang/prometheus" | 25 | "github.com/prometheus/client_golang/prometheus" |
26 | "golang.org/x/sys/unix" | ||
25 | kingpin "gopkg.in/alecthomas/kingpin.v2" | 27 | kingpin "gopkg.in/alecthomas/kingpin.v2" |
26 | ) | 28 | ) |
27 | 29 | ||
@@ -30,27 +32,29 @@ const ( | |||
30 | ) | 32 | ) |
31 | 33 | ||
32 | var ( | 34 | var ( |
33 | perfCPUsFlag = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String() | 35 | perfCPUsFlag = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String() |
36 | perfTracepointFlag = kingpin.Flag("collector.perf.tracepoint", "perf tracepoint that should be collected").Strings() | ||
34 | ) | 37 | ) |
35 | 38 | ||
36 | func init() { | 39 | func init() { |
37 | registerCollector(perfSubsystem, defaultDisabled, NewPerfCollector) | 40 | registerCollector(perfSubsystem, defaultDisabled, NewPerfCollector) |
38 | } | 41 | } |
39 | 42 | ||
40 | // perfCollector is a Collector that uses the perf subsystem to collect | 43 | // perfTracepointFlagToTracepoints returns the set of configured tracepoints. |
41 | // metrics. It uses perf_event_open an ioctls for profiling. Due to the fact | 44 | func perfTracepointFlagToTracepoints(tracepointsFlag []string) ([]*perfTracepoint, error) { |
42 | // that the perf subsystem is highly dependent on kernel configuration and | 45 | tracepoints := make([]*perfTracepoint, len(tracepointsFlag)) |
43 | // settings not all profiler values may be exposed on the target system at any | 46 | |
44 | // given time. | 47 | for i, tracepoint := range tracepointsFlag { |
45 | type perfCollector struct { | 48 | split := strings.Split(tracepoint, ":") |
46 | hwProfilerCPUMap map[*perf.HardwareProfiler]int | 49 | if len(split) != 2 { |
47 | swProfilerCPUMap map[*perf.SoftwareProfiler]int | 50 | return nil, fmt.Errorf("Invalid tracepoint config %v", tracepoint) |
48 | cacheProfilerCPUMap map[*perf.CacheProfiler]int | 51 | } |
49 | perfHwProfilers map[int]*perf.HardwareProfiler | 52 | tracepoints[i] = &perfTracepoint{ |
50 | perfSwProfilers map[int]*perf.SoftwareProfiler | 53 | subsystem: split[0], |
51 | perfCacheProfilers map[int]*perf.CacheProfiler | 54 | event: split[1], |
52 | desc map[string]*prometheus.Desc | 55 | } |
53 | logger log.Logger | 56 | } |
57 | return tracepoints, nil | ||
54 | } | 58 | } |
55 | 59 | ||
56 | // perfCPUFlagToCPUs returns a set of CPUs for the perf collectors to monitor. | 60 | // perfCPUFlagToCPUs returns a set of CPUs for the perf collectors to monitor. |
@@ -98,6 +102,144 @@ func perfCPUFlagToCPUs(cpuFlag string) ([]int, error) { | |||
98 | return cpus, nil | 102 | return cpus, nil |
99 | } | 103 | } |
100 | 104 | ||
105 | // perfTracepoint is a struct for holding tracepoint information. | ||
106 | type perfTracepoint struct { | ||
107 | subsystem string | ||
108 | event string | ||
109 | } | ||
110 | |||
111 | // label returns the tracepoint name in the format of subsystem_tracepoint. | ||
112 | func (t *perfTracepoint) label() string { | ||
113 | return t.subsystem + "_" + t.event | ||
114 | } | ||
115 | |||
116 | // tracepoint returns the tracepoint name in the format of subsystem:tracepoint. | ||
117 | func (t *perfTracepoint) tracepoint() string { | ||
118 | return t.subsystem + ":" + t.event | ||
119 | } | ||
120 | |||
121 | // perfCollector is a Collector that uses the perf subsystem to collect | ||
122 | // metrics. It uses perf_event_open an ioctls for profiling. Due to the fact | ||
123 | // that the perf subsystem is highly dependent on kernel configuration and | ||
124 | // settings not all profiler values may be exposed on the target system at any | ||
125 | // given time. | ||
126 | type perfCollector struct { | ||
127 | hwProfilerCPUMap map[*perf.HardwareProfiler]int | ||
128 | swProfilerCPUMap map[*perf.SoftwareProfiler]int | ||
129 | cacheProfilerCPUMap map[*perf.CacheProfiler]int | ||
130 | perfHwProfilers map[int]*perf.HardwareProfiler | ||
131 | perfSwProfilers map[int]*perf.SoftwareProfiler | ||
132 | perfCacheProfilers map[int]*perf.CacheProfiler | ||
133 | desc map[string]*prometheus.Desc | ||
134 | logger log.Logger | ||
135 | tracepointCollector *perfTracepointCollector | ||
136 | } | ||
137 | |||
138 | type perfTracepointCollector struct { | ||
139 | // desc is the mapping of subsystem to tracepoint *prometheus.Desc. | ||
140 | descs map[string]map[string]*prometheus.Desc | ||
141 | // collection order is the sorted configured collection order of the profiler. | ||
142 | collectionOrder []string | ||
143 | |||
144 | logger log.Logger | ||
145 | profilers map[int]perf.GroupProfiler | ||
146 | } | ||
147 | |||
148 | // update is used collect all tracepoints across all tracepoint profilers. | ||
149 | func (c *perfTracepointCollector) update(ch chan<- prometheus.Metric) error { | ||
150 | for cpu := range c.profilers { | ||
151 | if err := c.updateCPU(cpu, ch); err != nil { | ||
152 | return err | ||
153 | } | ||
154 | } | ||
155 | return nil | ||
156 | } | ||
157 | |||
158 | // updateCPU is used to update metrics per CPU profiler. | ||
159 | func (c *perfTracepointCollector) updateCPU(cpu int, ch chan<- prometheus.Metric) error { | ||
160 | cpuStr := fmt.Sprintf("%d", cpu) | ||
161 | profiler := c.profilers[cpu] | ||
162 | p, err := profiler.Profile() | ||
163 | if err != nil { | ||
164 | level.Error(c.logger).Log("msg", "Failed to collect tracepoint profile", "err", err) | ||
165 | return err | ||
166 | } | ||
167 | |||
168 | for i, value := range p.Values { | ||
169 | // Get the Desc from the ordered group value. | ||
170 | descKey := c.collectionOrder[i] | ||
171 | descKeySlice := strings.Split(descKey, ":") | ||
172 | ch <- prometheus.MustNewConstMetric( | ||
173 | c.descs[descKeySlice[0]][descKeySlice[1]], | ||
174 | prometheus.CounterValue, | ||
175 | float64(value), | ||
176 | cpuStr, | ||
177 | ) | ||
178 | } | ||
179 | return nil | ||
180 | } | ||
181 | |||
182 | // newPerfTracepointCollector returns a configured perfTracepointCollector. | ||
183 | func newPerfTracepointCollector( | ||
184 | logger log.Logger, | ||
185 | tracepointsFlag []string, | ||
186 | cpus []int, | ||
187 | ) (*perfTracepointCollector, error) { | ||
188 | tracepoints, err := perfTracepointFlagToTracepoints(tracepointsFlag) | ||
189 | if err != nil { | ||
190 | return nil, err | ||
191 | } | ||
192 | |||
193 | collectionOrder := make([]string, len(tracepoints)) | ||
194 | descs := map[string]map[string]*prometheus.Desc{} | ||
195 | eventAttrs := make([]unix.PerfEventAttr, len(tracepoints)) | ||
196 | |||
197 | for i, tracepoint := range tracepoints { | ||
198 | eventAttr, err := perf.TracepointEventAttr(tracepoint.subsystem, tracepoint.event) | ||
199 | if err != nil { | ||
200 | return nil, err | ||
201 | } | ||
202 | eventAttrs[i] = *eventAttr | ||
203 | collectionOrder[i] = tracepoint.tracepoint() | ||
204 | if _, ok := descs[tracepoint.subsystem]; !ok { | ||
205 | descs[tracepoint.subsystem] = map[string]*prometheus.Desc{} | ||
206 | } | ||
207 | descs[tracepoint.subsystem][tracepoint.event] = prometheus.NewDesc( | ||
208 | prometheus.BuildFQName( | ||
209 | namespace, | ||
210 | perfSubsystem, | ||
211 | tracepoint.label(), | ||
212 | ), | ||
213 | "Perf tracepoint "+tracepoint.tracepoint(), | ||
214 | []string{"cpu"}, | ||
215 | nil, | ||
216 | ) | ||
217 | } | ||
218 | |||
219 | profilers := make(map[int]perf.GroupProfiler, len(cpus)) | ||
220 | for _, cpu := range cpus { | ||
221 | profiler, err := perf.NewGroupProfiler(-1, cpu, 0, eventAttrs...) | ||
222 | if err != nil { | ||
223 | return nil, err | ||
224 | } | ||
225 | profilers[cpu] = profiler | ||
226 | } | ||
227 | |||
228 | c := &perfTracepointCollector{ | ||
229 | descs: descs, | ||
230 | collectionOrder: collectionOrder, | ||
231 | profilers: profilers, | ||
232 | logger: logger, | ||
233 | } | ||
234 | |||
235 | for _, profiler := range c.profilers { | ||
236 | if err := profiler.Start(); err != nil { | ||
237 | return nil, err | ||
238 | } | ||
239 | } | ||
240 | return c, nil | ||
241 | } | ||
242 | |||
101 | // NewPerfCollector returns a new perf based collector, it creates a profiler | 243 | // NewPerfCollector returns a new perf based collector, it creates a profiler |
102 | // per CPU. | 244 | // per CPU. |
103 | func NewPerfCollector(logger log.Logger) (Collector, error) { | 245 | func NewPerfCollector(logger log.Logger) (Collector, error) { |
@@ -127,6 +269,16 @@ func NewPerfCollector(logger log.Logger) (Collector, error) { | |||
127 | } | 269 | } |
128 | } | 270 | } |
129 | 271 | ||
272 | // First configure any tracepoints. | ||
273 | if *perfTracepointFlag != nil && len(*perfTracepointFlag) > 0 { | ||
274 | tracepointCollector, err := newPerfTracepointCollector(logger, *perfTracepointFlag, cpus) | ||
275 | if err != nil { | ||
276 | return nil, err | ||
277 | } | ||
278 | collector.tracepointCollector = tracepointCollector | ||
279 | } | ||
280 | |||
281 | // Configure all profilers for the specified CPUs. | ||
130 | for _, cpu := range cpus { | 282 | for _, cpu := range cpus { |
131 | // Use -1 to profile all processes on the CPU, see: | 283 | // Use -1 to profile all processes on the CPU, see: |
132 | // man perf_event_open | 284 | // man perf_event_open |
@@ -411,6 +563,9 @@ func (c *perfCollector) Update(ch chan<- prometheus.Metric) error { | |||
411 | if err := c.updateCacheStats(ch); err != nil { | 563 | if err := c.updateCacheStats(ch); err != nil { |
412 | return err | 564 | return err |
413 | } | 565 | } |
566 | if c.tracepointCollector != nil { | ||
567 | return c.tracepointCollector.update(ch) | ||
568 | } | ||
414 | 569 | ||
415 | return nil | 570 | return nil |
416 | } | 571 | } |
diff --git a/collector/perf_linux_test.go b/collector/perf_linux_test.go index fca5455..b384a65 100644 --- a/collector/perf_linux_test.go +++ b/collector/perf_linux_test.go | |||
@@ -180,8 +180,74 @@ func TestPerfCPUFlagToCPUs(t *testing.T) { | |||
180 | if test.exCpus[i] != cpus[i] { | 180 | if test.exCpus[i] != cpus[i] { |
181 | t.Fatalf( | 181 | t.Fatalf( |
182 | "expected cpus %v, got %v", | 182 | "expected cpus %v, got %v", |
183 | test.exCpus, | 183 | test.exCpus[i], |
184 | cpus, | 184 | cpus[i], |
185 | ) | ||
186 | } | ||
187 | } | ||
188 | }) | ||
189 | } | ||
190 | } | ||
191 | |||
192 | func TestPerfTracepointFlagToTracepoints(t *testing.T) { | ||
193 | tests := []struct { | ||
194 | name string | ||
195 | flag []string | ||
196 | exTracepoints []*perfTracepoint | ||
197 | errStr string | ||
198 | }{ | ||
199 | { | ||
200 | name: "valid single tracepoint", | ||
201 | flag: []string{"sched:sched_kthread_stop"}, | ||
202 | exTracepoints: []*perfTracepoint{ | ||
203 | { | ||
204 | subsystem: "sched", | ||
205 | event: "sched_kthread_stop", | ||
206 | }, | ||
207 | }, | ||
208 | }, | ||
209 | { | ||
210 | name: "valid multiple tracepoints", | ||
211 | flag: []string{"sched:sched_kthread_stop", "sched:sched_process_fork"}, | ||
212 | exTracepoints: []*perfTracepoint{ | ||
213 | { | ||
214 | subsystem: "sched", | ||
215 | event: "sched_kthread_stop", | ||
216 | }, | ||
217 | { | ||
218 | subsystem: "sched", | ||
219 | event: "sched_process_fork", | ||
220 | }, | ||
221 | }, | ||
222 | }, | ||
223 | } | ||
224 | |||
225 | for _, test := range tests { | ||
226 | t.Run(test.name, func(t *testing.T) { | ||
227 | tracepoints, err := perfTracepointFlagToTracepoints(test.flag) | ||
228 | if test.errStr != "" { | ||
229 | if err != nil { | ||
230 | t.Fatal("expected error to not be nil") | ||
231 | } | ||
232 | if test.errStr != err.Error() { | ||
233 | t.Fatalf( | ||
234 | "expected error %q, got %q", | ||
235 | test.errStr, | ||
236 | err.Error(), | ||
237 | ) | ||
238 | } | ||
239 | return | ||
240 | } | ||
241 | if err != nil { | ||
242 | t.Fatal(err) | ||
243 | } | ||
244 | for i := range tracepoints { | ||
245 | if test.exTracepoints[i].event != tracepoints[i].event && | ||
246 | test.exTracepoints[i].subsystem != tracepoints[i].subsystem { | ||
247 | t.Fatalf( | ||
248 | "expected tracepoint %v, got %v", | ||
249 | test.exTracepoints[i], | ||
250 | tracepoints[i], | ||
185 | ) | 251 | ) |
186 | } | 252 | } |
187 | } | 253 | } |