aboutsummaryrefslogtreecommitdiff
path: root/collector/perf_linux.go
diff options
context:
space:
mode:
Diffstat (limited to 'collector/perf_linux.go')
-rw-r--r--collector/perf_linux.go165
1 files changed, 129 insertions, 36 deletions
diff --git a/collector/perf_linux.go b/collector/perf_linux.go
index e8a52b4..b67f970 100644
--- a/collector/perf_linux.go
+++ b/collector/perf_linux.go
@@ -14,18 +14,25 @@
14package collector 14package collector
15 15
16import ( 16import (
17 "fmt"
17 "runtime" 18 "runtime"
18 "strconv" 19 "strconv"
20 "strings"
19 21
20 "github.com/go-kit/kit/log" 22 "github.com/go-kit/kit/log"
21 "github.com/hodgesds/perf-utils" 23 "github.com/hodgesds/perf-utils"
22 "github.com/prometheus/client_golang/prometheus" 24 "github.com/prometheus/client_golang/prometheus"
25 kingpin "gopkg.in/alecthomas/kingpin.v2"
23) 26)
24 27
25const ( 28const (
26 perfSubsystem = "perf" 29 perfSubsystem = "perf"
27) 30)
28 31
32var (
33 perfCPUsFlag = kingpin.Flag("collector.perf.cpus", "List of CPUs from which perf metrics should be collected").Default("").String()
34)
35
29func init() { 36func init() {
30 registerCollector(perfSubsystem, defaultDisabled, NewPerfCollector) 37 registerCollector(perfSubsystem, defaultDisabled, NewPerfCollector)
31} 38}
@@ -36,40 +43,123 @@ func init() {
36// settings not all profiler values may be exposed on the target system at any 43// settings not all profiler values may be exposed on the target system at any
37// given time. 44// given time.
38type perfCollector struct { 45type perfCollector struct {
39 perfHwProfilers map[int]perf.HardwareProfiler 46 hwProfilerCPUMap map[*perf.HardwareProfiler]int
40 perfSwProfilers map[int]perf.SoftwareProfiler 47 swProfilerCPUMap map[*perf.SoftwareProfiler]int
41 perfCacheProfilers map[int]perf.CacheProfiler 48 cacheProfilerCPUMap map[*perf.CacheProfiler]int
42 desc map[string]*prometheus.Desc 49 perfHwProfilers map[int]*perf.HardwareProfiler
43 logger log.Logger 50 perfSwProfilers map[int]*perf.SoftwareProfiler
51 perfCacheProfilers map[int]*perf.CacheProfiler
52 desc map[string]*prometheus.Desc
53 logger log.Logger
54}
55
56// perfCPUFlagToCPUs returns a set of CPUs for the perf collectors to monitor.
57func perfCPUFlagToCPUs(cpuFlag string) ([]int, error) {
58 var err error
59 cpus := []int{}
60 for _, subset := range strings.Split(cpuFlag, ",") {
61 // First parse a single CPU.
62 if !strings.Contains(subset, "-") {
63 cpu, err := strconv.Atoi(subset)
64 if err != nil {
65 return nil, err
66 }
67 cpus = append(cpus, cpu)
68 continue
69 }
70
71 stride := 1
72 // Handle strides, ie 1-10:5 should yield 1,5,10
73 strideSet := strings.Split(subset, ":")
74 if len(strideSet) == 2 {
75 stride, err = strconv.Atoi(strideSet[1])
76 if err != nil {
77 return nil, err
78 }
79 }
80
81 rangeSet := strings.Split(strideSet[0], "-")
82 if len(rangeSet) != 2 {
83 return nil, fmt.Errorf("invalid flag value %q", cpuFlag)
84 }
85 start, err := strconv.Atoi(rangeSet[0])
86 if err != nil {
87 return nil, err
88 }
89 end, err := strconv.Atoi(rangeSet[1])
90 if err != nil {
91 return nil, err
92 }
93 for i := start; i <= end; i += stride {
94 cpus = append(cpus, i)
95 }
96 }
97
98 return cpus, nil
44} 99}
45 100
46// NewPerfCollector returns a new perf based collector, it creates a profiler 101// NewPerfCollector returns a new perf based collector, it creates a profiler
47// per CPU. 102// per CPU.
48func NewPerfCollector(logger log.Logger) (Collector, error) { 103func NewPerfCollector(logger log.Logger) (Collector, error) {
49 c := &perfCollector{ 104 collector := &perfCollector{
50 perfHwProfilers: map[int]perf.HardwareProfiler{}, 105 perfHwProfilers: map[int]*perf.HardwareProfiler{},
51 perfSwProfilers: map[int]perf.SoftwareProfiler{}, 106 perfSwProfilers: map[int]*perf.SoftwareProfiler{},
52 perfCacheProfilers: map[int]perf.CacheProfiler{}, 107 perfCacheProfilers: map[int]*perf.CacheProfiler{},
53 logger: logger, 108 hwProfilerCPUMap: map[*perf.HardwareProfiler]int{},
109 swProfilerCPUMap: map[*perf.SoftwareProfiler]int{},
110 cacheProfilerCPUMap: map[*perf.CacheProfiler]int{},
111 logger: logger,
54 } 112 }
55 ncpus := runtime.NumCPU() 113
56 for i := 0; i < ncpus; i++ { 114 if perfCPUsFlag != nil && *perfCPUsFlag != "" {
57 // Use -1 to profile all processes on the CPU, see: 115 cpus, err := perfCPUFlagToCPUs(*perfCPUsFlag)
58 // man perf_event_open 116 if err != nil {
59 c.perfHwProfilers[i] = perf.NewHardwareProfiler(-1, i) 117 return nil, err
60 if err := c.perfHwProfilers[i].Start(); err != nil { 118 }
61 return c, err 119 for _, cpu := range cpus {
62 } 120 // Use -1 to profile all processes on the CPU, see:
63 c.perfSwProfilers[i] = perf.NewSoftwareProfiler(-1, i) 121 // man perf_event_open
64 if err := c.perfSwProfilers[i].Start(); err != nil { 122 hwProf := perf.NewHardwareProfiler(-1, cpu)
65 return c, err 123 if err := hwProf.Start(); err != nil {
66 } 124 return nil, err
67 c.perfCacheProfilers[i] = perf.NewCacheProfiler(-1, i) 125 }
68 if err := c.perfCacheProfilers[i].Start(); err != nil { 126 collector.perfHwProfilers[cpu] = &hwProf
69 return c, err 127
128 swProf := perf.NewSoftwareProfiler(-1, cpu)
129 if err := swProf.Start(); err != nil {
130 return nil, err
131 }
132 collector.perfSwProfilers[cpu] = &swProf
133
134 cacheProf := perf.NewCacheProfiler(-1, cpu)
135 if err := cacheProf.Start(); err != nil {
136 return nil, err
137 }
138 collector.perfCacheProfilers[cpu] = &cacheProf
139 }
140 } else {
141 for i := 0; i < runtime.NumCPU(); i++ {
142 hwProf := perf.NewHardwareProfiler(-1, i)
143 if err := hwProf.Start(); err != nil {
144 return nil, err
145 }
146 collector.perfHwProfilers[i] = &hwProf
147
148 swProf := perf.NewSoftwareProfiler(-1, i)
149 if err := swProf.Start(); err != nil {
150 return nil, err
151 }
152 collector.perfSwProfilers[i] = &swProf
153
154 cacheProf := perf.NewCacheProfiler(-1, i)
155 if err := cacheProf.Start(); err != nil {
156 return nil, err
157 }
158 collector.perfCacheProfilers[i] = &cacheProf
70 } 159 }
71 } 160 }
72 c.desc = map[string]*prometheus.Desc{ 161
162 collector.desc = map[string]*prometheus.Desc{
73 "cpucycles_total": prometheus.NewDesc( 163 "cpucycles_total": prometheus.NewDesc(
74 prometheus.BuildFQName( 164 prometheus.BuildFQName(
75 namespace, 165 namespace,
@@ -312,7 +402,7 @@ func NewPerfCollector(logger log.Logger) (Collector, error) {
312 ), 402 ),
313 } 403 }
314 404
315 return c, nil 405 return collector, nil
316} 406}
317 407
318// Update implements the Collector interface and will collect metrics per CPU. 408// Update implements the Collector interface and will collect metrics per CPU.
@@ -333,9 +423,10 @@ func (c *perfCollector) Update(ch chan<- prometheus.Metric) error {
333} 423}
334 424
335func (c *perfCollector) updateHardwareStats(ch chan<- prometheus.Metric) error { 425func (c *perfCollector) updateHardwareStats(ch chan<- prometheus.Metric) error {
336 for cpu, profiler := range c.perfHwProfilers { 426 for _, profiler := range c.perfHwProfilers {
337 cpuStr := strconv.Itoa(cpu) 427 cpuid := c.hwProfilerCPUMap[profiler]
338 hwProfile, err := profiler.Profile() 428 cpuStr := fmt.Sprintf("%d", cpuid)
429 hwProfile, err := (*profiler).Profile()
339 if err != nil { 430 if err != nil {
340 return err 431 return err
341 } 432 }
@@ -404,9 +495,10 @@ func (c *perfCollector) updateHardwareStats(ch chan<- prometheus.Metric) error {
404} 495}
405 496
406func (c *perfCollector) updateSoftwareStats(ch chan<- prometheus.Metric) error { 497func (c *perfCollector) updateSoftwareStats(ch chan<- prometheus.Metric) error {
407 for cpu, profiler := range c.perfSwProfilers { 498 for _, profiler := range c.perfSwProfilers {
408 cpuStr := strconv.Itoa(cpu) 499 cpuid := c.swProfilerCPUMap[profiler]
409 swProfile, err := profiler.Profile() 500 cpuStr := fmt.Sprintf("%d", cpuid)
501 swProfile, err := (*profiler).Profile()
410 if err != nil { 502 if err != nil {
411 return err 503 return err
412 } 504 }
@@ -459,9 +551,10 @@ func (c *perfCollector) updateSoftwareStats(ch chan<- prometheus.Metric) error {
459} 551}
460 552
461func (c *perfCollector) updateCacheStats(ch chan<- prometheus.Metric) error { 553func (c *perfCollector) updateCacheStats(ch chan<- prometheus.Metric) error {
462 for cpu, profiler := range c.perfCacheProfilers { 554 for _, profiler := range c.perfCacheProfilers {
463 cpuStr := strconv.Itoa(cpu) 555 cpuid := c.cacheProfilerCPUMap[profiler]
464 cacheProfile, err := profiler.Profile() 556 cpuStr := fmt.Sprintf("%d", cpuid)
557 cacheProfile, err := (*profiler).Profile()
465 if err != nil { 558 if err != nil {
466 return err 559 return err
467 } 560 }