diff options
Diffstat (limited to 'collector/perf_linux.go')
-rw-r--r-- | collector/perf_linux.go | 165 |
1 files changed, 129 insertions, 36 deletions
diff --git a/collector/perf_linux.go b/collector/perf_linux.go index e8a52b4..b67f970 100644 --- a/collector/perf_linux.go +++ b/collector/perf_linux.go | |||
@@ -14,18 +14,25 @@ | |||
14 | package collector | 14 | package collector |
15 | 15 | ||
16 | import ( | 16 | import ( |
17 | "fmt" | ||
17 | "runtime" | 18 | "runtime" |
18 | "strconv" | 19 | "strconv" |
20 | "strings" | ||
19 | 21 | ||
20 | "github.com/go-kit/kit/log" | 22 | "github.com/go-kit/kit/log" |
21 | "github.com/hodgesds/perf-utils" | 23 | "github.com/hodgesds/perf-utils" |
22 | "github.com/prometheus/client_golang/prometheus" | 24 | "github.com/prometheus/client_golang/prometheus" |
25 | kingpin "gopkg.in/alecthomas/kingpin.v2" | ||
23 | ) | 26 | ) |
24 | 27 | ||
25 | const ( | 28 | const ( |
26 | perfSubsystem = "perf" | 29 | perfSubsystem = "perf" |
27 | ) | 30 | ) |
28 | 31 | ||
32 | var ( | ||
33 | perfCPUsFlag = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String() | ||
34 | ) | ||
35 | |||
29 | func init() { | 36 | func init() { |
30 | registerCollector(perfSubsystem, defaultDisabled, NewPerfCollector) | 37 | registerCollector(perfSubsystem, defaultDisabled, NewPerfCollector) |
31 | } | 38 | } |
@@ -36,40 +43,123 @@ func init() { | |||
36 | // settings not all profiler values may be exposed on the target system at any | 43 | // settings not all profiler values may be exposed on the target system at any |
37 | // given time. | 44 | // given time. |
38 | type perfCollector struct { | 45 | type perfCollector struct { |
39 | perfHwProfilers map[int]perf.HardwareProfiler | 46 | hwProfilerCPUMap map[*perf.HardwareProfiler]int |
40 | perfSwProfilers map[int]perf.SoftwareProfiler | 47 | swProfilerCPUMap map[*perf.SoftwareProfiler]int |
41 | perfCacheProfilers map[int]perf.CacheProfiler | 48 | cacheProfilerCPUMap map[*perf.CacheProfiler]int |
42 | desc map[string]*prometheus.Desc | 49 | perfHwProfilers map[int]*perf.HardwareProfiler |
43 | logger log.Logger | 50 | perfSwProfilers map[int]*perf.SoftwareProfiler |
51 | perfCacheProfilers map[int]*perf.CacheProfiler | ||
52 | desc map[string]*prometheus.Desc | ||
53 | logger log.Logger | ||
54 | } | ||
55 | |||
56 | // perfCPUFlagToCPUs returns a set of CPUs for the perf collectors to monitor. | ||
57 | func perfCPUFlagToCPUs(cpuFlag string) ([]int, error) { | ||
58 | var err error | ||
59 | cpus := []int{} | ||
60 | for _, subset := range strings.Split(cpuFlag, ",") { | ||
61 | // First parse a single CPU. | ||
62 | if !strings.Contains(subset, "-") { | ||
63 | cpu, err := strconv.Atoi(subset) | ||
64 | if err != nil { | ||
65 | return nil, err | ||
66 | } | ||
67 | cpus = append(cpus, cpu) | ||
68 | continue | ||
69 | } | ||
70 | |||
71 | stride := 1 | ||
72 | // Handle strides, ie 1-10:5 should yield 1,5,10 | ||
73 | strideSet := strings.Split(subset, ":") | ||
74 | if len(strideSet) == 2 { | ||
75 | stride, err = strconv.Atoi(strideSet[1]) | ||
76 | if err != nil { | ||
77 | return nil, err | ||
78 | } | ||
79 | } | ||
80 | |||
81 | rangeSet := strings.Split(strideSet[0], "-") | ||
82 | if len(rangeSet) != 2 { | ||
83 | return nil, fmt.Errorf("invalid flag value %q", cpuFlag) | ||
84 | } | ||
85 | start, err := strconv.Atoi(rangeSet[0]) | ||
86 | if err != nil { | ||
87 | return nil, err | ||
88 | } | ||
89 | end, err := strconv.Atoi(rangeSet[1]) | ||
90 | if err != nil { | ||
91 | return nil, err | ||
92 | } | ||
93 | for i := start; i <= end; i += stride { | ||
94 | cpus = append(cpus, i) | ||
95 | } | ||
96 | } | ||
97 | |||
98 | return cpus, nil | ||
44 | } | 99 | } |
45 | 100 | ||
46 | // NewPerfCollector returns a new perf based collector, it creates a profiler | 101 | // NewPerfCollector returns a new perf based collector, it creates a profiler |
47 | // per CPU. | 102 | // per CPU. |
48 | func NewPerfCollector(logger log.Logger) (Collector, error) { | 103 | func NewPerfCollector(logger log.Logger) (Collector, error) { |
49 | c := &perfCollector{ | 104 | collector := &perfCollector{ |
50 | perfHwProfilers: map[int]perf.HardwareProfiler{}, | 105 | perfHwProfilers: map[int]*perf.HardwareProfiler{}, |
51 | perfSwProfilers: map[int]perf.SoftwareProfiler{}, | 106 | perfSwProfilers: map[int]*perf.SoftwareProfiler{}, |
52 | perfCacheProfilers: map[int]perf.CacheProfiler{}, | 107 | perfCacheProfilers: map[int]*perf.CacheProfiler{}, |
53 | logger: logger, | 108 | hwProfilerCPUMap: map[*perf.HardwareProfiler]int{}, |
109 | swProfilerCPUMap: map[*perf.SoftwareProfiler]int{}, | ||
110 | cacheProfilerCPUMap: map[*perf.CacheProfiler]int{}, | ||
111 | logger: logger, | ||
54 | } | 112 | } |
55 | ncpus := runtime.NumCPU() | 113 | |
56 | for i := 0; i < ncpus; i++ { | 114 | if perfCPUsFlag != nil && *perfCPUsFlag != "" { |
57 | // Use -1 to profile all processes on the CPU, see: | 115 | cpus, err := perfCPUFlagToCPUs(*perfCPUsFlag) |
58 | // man perf_event_open | 116 | if err != nil { |
59 | c.perfHwProfilers[i] = perf.NewHardwareProfiler(-1, i) | 117 | return nil, err |
60 | if err := c.perfHwProfilers[i].Start(); err != nil { | 118 | } |
61 | return c, err | 119 | for _, cpu := range cpus { |
62 | } | 120 | // Use -1 to profile all processes on the CPU, see: |
63 | c.perfSwProfilers[i] = perf.NewSoftwareProfiler(-1, i) | 121 | // man perf_event_open |
64 | if err := c.perfSwProfilers[i].Start(); err != nil { | 122 | hwProf := perf.NewHardwareProfiler(-1, cpu) |
65 | return c, err | 123 | if err := hwProf.Start(); err != nil { |
66 | } | 124 | return nil, err |
67 | c.perfCacheProfilers[i] = perf.NewCacheProfiler(-1, i) | 125 | } |
68 | if err := c.perfCacheProfilers[i].Start(); err != nil { | 126 | collector.perfHwProfilers[cpu] = &hwProf |
69 | return c, err | 127 | |
128 | swProf := perf.NewSoftwareProfiler(-1, cpu) | ||
129 | if err := swProf.Start(); err != nil { | ||
130 | return nil, err | ||
131 | } | ||
132 | collector.perfSwProfilers[cpu] = &swProf | ||
133 | |||
134 | cacheProf := perf.NewCacheProfiler(-1, cpu) | ||
135 | if err := cacheProf.Start(); err != nil { | ||
136 | return nil, err | ||
137 | } | ||
138 | collector.perfCacheProfilers[cpu] = &cacheProf | ||
139 | } | ||
140 | } else { | ||
141 | for i := 0; i < runtime.NumCPU(); i++ { | ||
142 | hwProf := perf.NewHardwareProfiler(-1, i) | ||
143 | if err := hwProf.Start(); err != nil { | ||
144 | return nil, err | ||
145 | } | ||
146 | collector.perfHwProfilers[i] = &hwProf | ||
147 | |||
148 | swProf := perf.NewSoftwareProfiler(-1, i) | ||
149 | if err := swProf.Start(); err != nil { | ||
150 | return nil, err | ||
151 | } | ||
152 | collector.perfSwProfilers[i] = &swProf | ||
153 | |||
154 | cacheProf := perf.NewCacheProfiler(-1, i) | ||
155 | if err := cacheProf.Start(); err != nil { | ||
156 | return nil, err | ||
157 | } | ||
158 | collector.perfCacheProfilers[i] = &cacheProf | ||
70 | } | 159 | } |
71 | } | 160 | } |
72 | c.desc = map[string]*prometheus.Desc{ | 161 | |
162 | collector.desc = map[string]*prometheus.Desc{ | ||
73 | "cpucycles_total": prometheus.NewDesc( | 163 | "cpucycles_total": prometheus.NewDesc( |
74 | prometheus.BuildFQName( | 164 | prometheus.BuildFQName( |
75 | namespace, | 165 | namespace, |
@@ -312,7 +402,7 @@ func NewPerfCollector(logger log.Logger) (Collector, error) { | |||
312 | ), | 402 | ), |
313 | } | 403 | } |
314 | 404 | ||
315 | return c, nil | 405 | return collector, nil |
316 | } | 406 | } |
317 | 407 | ||
318 | // Update implements the Collector interface and will collect metrics per CPU. | 408 | // Update implements the Collector interface and will collect metrics per CPU. |
@@ -333,9 +423,10 @@ func (c *perfCollector) Update(ch chan<- prometheus.Metric) error { | |||
333 | } | 423 | } |
334 | 424 | ||
335 | func (c *perfCollector) updateHardwareStats(ch chan<- prometheus.Metric) error { | 425 | func (c *perfCollector) updateHardwareStats(ch chan<- prometheus.Metric) error { |
336 | for cpu, profiler := range c.perfHwProfilers { | 426 | for _, profiler := range c.perfHwProfilers { |
337 | cpuStr := strconv.Itoa(cpu) | 427 | cpuid := c.hwProfilerCPUMap[profiler] |
338 | hwProfile, err := profiler.Profile() | 428 | cpuStr := fmt.Sprintf("%d", cpuid) |
429 | hwProfile, err := (*profiler).Profile() | ||
339 | if err != nil { | 430 | if err != nil { |
340 | return err | 431 | return err |
341 | } | 432 | } |
@@ -404,9 +495,10 @@ func (c *perfCollector) updateHardwareStats(ch chan<- prometheus.Metric) error { | |||
404 | } | 495 | } |
405 | 496 | ||
406 | func (c *perfCollector) updateSoftwareStats(ch chan<- prometheus.Metric) error { | 497 | func (c *perfCollector) updateSoftwareStats(ch chan<- prometheus.Metric) error { |
407 | for cpu, profiler := range c.perfSwProfilers { | 498 | for _, profiler := range c.perfSwProfilers { |
408 | cpuStr := strconv.Itoa(cpu) | 499 | cpuid := c.swProfilerCPUMap[profiler] |
409 | swProfile, err := profiler.Profile() | 500 | cpuStr := fmt.Sprintf("%d", cpuid) |
501 | swProfile, err := (*profiler).Profile() | ||
410 | if err != nil { | 502 | if err != nil { |
411 | return err | 503 | return err |
412 | } | 504 | } |
@@ -459,9 +551,10 @@ func (c *perfCollector) updateSoftwareStats(ch chan<- prometheus.Metric) error { | |||
459 | } | 551 | } |
460 | 552 | ||
461 | func (c *perfCollector) updateCacheStats(ch chan<- prometheus.Metric) error { | 553 | func (c *perfCollector) updateCacheStats(ch chan<- prometheus.Metric) error { |
462 | for cpu, profiler := range c.perfCacheProfilers { | 554 | for _, profiler := range c.perfCacheProfilers { |
463 | cpuStr := strconv.Itoa(cpu) | 555 | cpuid := c.cacheProfilerCPUMap[profiler] |
464 | cacheProfile, err := profiler.Profile() | 556 | cpuStr := fmt.Sprintf("%d", cpuid) |
557 | cacheProfile, err := (*profiler).Profile() | ||
465 | if err != nil { | 558 | if err != nil { |
466 | return err | 559 | return err |
467 | } | 560 | } |