aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordomchan <31119455+domgoer@users.noreply.github.com>2020-07-18 00:32:23 +0800
committerGitHub <noreply@github.com>2020-07-17 18:32:23 +0200
commit503e4fc8486c0082d6bd8c53fad646bcfafeedf6 (patch)
tree51cf53ed86b640630b69d9c45719592b71686bd8
parentf4b89c79a25fa10a397075b8bc3dd09314438fa2 (diff)
downloadprometheus_node_collector-503e4fc8486c0082d6bd8c53fad646bcfafeedf6.tar.bz2
prometheus_node_collector-503e4fc8486c0082d6bd8c53fad646bcfafeedf6.tar.xz
prometheus_node_collector-503e4fc8486c0082d6bd8c53fad646bcfafeedf6.zip
Expose cpu bugs and flags as info metrics. (#1788)
* Expose cpu bugs and flags as info metrics with a regexp filter. * Automatically enable CPU info metrics when using flags or bugs feature. Signed-off-by: domgoer <domdoumc@gmail.com>
-rw-r--r--collector/cpu_linux.go74
-rw-r--r--collector/fixtures/e2e-64k-page-output.txt12
-rw-r--r--collector/fixtures/e2e-output.txt12
-rwxr-xr-xend-to-end-test.sh2
4 files changed, 98 insertions, 2 deletions
diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go
index dfa4d4a..65476d3 100644
--- a/collector/cpu_linux.go
+++ b/collector/cpu_linux.go
@@ -18,6 +18,7 @@ package collector
18import ( 18import (
19 "fmt" 19 "fmt"
20 "path/filepath" 20 "path/filepath"
21 "regexp"
21 "strconv" 22 "strconv"
22 "sync" 23 "sync"
23 24
@@ -32,16 +33,23 @@ type cpuCollector struct {
32 fs procfs.FS 33 fs procfs.FS
33 cpu *prometheus.Desc 34 cpu *prometheus.Desc
34 cpuInfo *prometheus.Desc 35 cpuInfo *prometheus.Desc
36 cpuFlagsInfo *prometheus.Desc
37 cpuBugsInfo *prometheus.Desc
35 cpuGuest *prometheus.Desc 38 cpuGuest *prometheus.Desc
36 cpuCoreThrottle *prometheus.Desc 39 cpuCoreThrottle *prometheus.Desc
37 cpuPackageThrottle *prometheus.Desc 40 cpuPackageThrottle *prometheus.Desc
38 logger log.Logger 41 logger log.Logger
39 cpuStats []procfs.CPUStat 42 cpuStats []procfs.CPUStat
40 cpuStatsMutex sync.Mutex 43 cpuStatsMutex sync.Mutex
44
45 cpuFlagsIncludeRegexp *regexp.Regexp
46 cpuBugsIncludeRegexp *regexp.Regexp
41} 47}
42 48
43var ( 49var (
44 enableCPUInfo = kingpin.Flag("collector.cpu.info", "Enables metric cpu_info").Bool() 50 enableCPUInfo = kingpin.Flag("collector.cpu.info", "Enables metric cpu_info").Bool()
51 flagsInclude = kingpin.Flag("collector.cpu.info.flags-include", "Filter the `flags` field in cpuInfo with a value that must be a regular expression").String()
52 bugsInclude = kingpin.Flag("collector.cpu.info.bugs-include", "Filter the `bugs` field in cpuInfo with a value that must be a regular expression").String()
45) 53)
46 54
47func init() { 55func init() {
@@ -54,7 +62,7 @@ func NewCPUCollector(logger log.Logger) (Collector, error) {
54 if err != nil { 62 if err != nil {
55 return nil, fmt.Errorf("failed to open procfs: %w", err) 63 return nil, fmt.Errorf("failed to open procfs: %w", err)
56 } 64 }
57 return &cpuCollector{ 65 c := &cpuCollector{
58 fs: fs, 66 fs: fs,
59 cpu: nodeCPUSecondsDesc, 67 cpu: nodeCPUSecondsDesc,
60 cpuInfo: prometheus.NewDesc( 68 cpuInfo: prometheus.NewDesc(
@@ -62,6 +70,16 @@ func NewCPUCollector(logger log.Logger) (Collector, error) {
62 "CPU information from /proc/cpuinfo.", 70 "CPU information from /proc/cpuinfo.",
63 []string{"package", "core", "cpu", "vendor", "family", "model", "model_name", "microcode", "stepping", "cachesize"}, nil, 71 []string{"package", "core", "cpu", "vendor", "family", "model", "model_name", "microcode", "stepping", "cachesize"}, nil,
64 ), 72 ),
73 cpuFlagsInfo: prometheus.NewDesc(
74 prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "flag_info"),
75 "The `flags` field of CPU information from /proc/cpuinfo.",
76 []string{"flag"}, nil,
77 ),
78 cpuBugsInfo: prometheus.NewDesc(
79 prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "bug_info"),
80 "The `bugs` field of CPU information from /proc/cpuinfo.",
81 []string{"bug"}, nil,
82 ),
65 cpuGuest: prometheus.NewDesc( 83 cpuGuest: prometheus.NewDesc(
66 prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "guest_seconds_total"), 84 prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "guest_seconds_total"),
67 "Seconds the cpus spent in guests (VMs) for each mode.", 85 "Seconds the cpus spent in guests (VMs) for each mode.",
@@ -78,7 +96,34 @@ func NewCPUCollector(logger log.Logger) (Collector, error) {
78 []string{"package"}, nil, 96 []string{"package"}, nil,
79 ), 97 ),
80 logger: logger, 98 logger: logger,
81 }, nil 99 }
100 err = c.compileIncludeFlags(flagsInclude, bugsInclude)
101 if err != nil {
102 return nil, fmt.Errorf("fail to compile --collector.cpu.info.flags-include and --collector.cpu.info.bugs-include, the values of them must be regular expressions: %w", err)
103 }
104 return c, nil
105}
106
107func (c *cpuCollector) compileIncludeFlags(flagsIncludeFlag, bugsIncludeFlag *string) error {
108 if (*flagsIncludeFlag != "" || *bugsIncludeFlag != "") && !*enableCPUInfo {
109 *enableCPUInfo = true
110 level.Info(c.logger).Log("msg", "--collector.cpu.info has been set to `true` because you set the following flags, like --collector.cpu.info.flags-include and --collector.cpu.info.bugs-include")
111 }
112
113 var err error
114 if *flagsIncludeFlag != "" {
115 c.cpuFlagsIncludeRegexp, err = regexp.Compile(*flagsIncludeFlag)
116 if err != nil {
117 return err
118 }
119 }
120 if *bugsIncludeFlag != "" {
121 c.cpuBugsIncludeRegexp, err = regexp.Compile(*bugsIncludeFlag)
122 if err != nil {
123 return err
124 }
125 }
126 return nil
82} 127}
83 128
84// Update implements Collector and exposes cpu related metrics from /proc/stat and /sys/.../cpu/. 129// Update implements Collector and exposes cpu related metrics from /proc/stat and /sys/.../cpu/.
@@ -117,6 +162,31 @@ func (c *cpuCollector) updateInfo(ch chan<- prometheus.Metric) error {
117 cpu.Microcode, 162 cpu.Microcode,
118 cpu.Stepping, 163 cpu.Stepping,
119 cpu.CacheSize) 164 cpu.CacheSize)
165
166 if err := updateFieldInfo(cpu.Flags, c.cpuFlagsIncludeRegexp, c.cpuFlagsInfo, ch); err != nil {
167 return err
168 }
169 if err := updateFieldInfo(cpu.Bugs, c.cpuBugsIncludeRegexp, c.cpuBugsInfo, ch); err != nil {
170 return err
171 }
172 }
173 return nil
174}
175
176func updateFieldInfo(valueList []string, filter *regexp.Regexp, desc *prometheus.Desc, ch chan<- prometheus.Metric) error {
177 if filter == nil {
178 return nil
179 }
180
181 for _, val := range valueList {
182 if !filter.MatchString(val) {
183 continue
184 }
185 ch <- prometheus.MustNewConstMetric(desc,
186 prometheus.GaugeValue,
187 1,
188 val,
189 )
120 } 190 }
121 return nil 191 return nil
122} 192}
diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt
index 7b857ff..cbed127 100644
--- a/collector/fixtures/e2e-64k-page-output.txt
+++ b/collector/fixtures/e2e-64k-page-output.txt
@@ -184,12 +184,24 @@ node_cooling_device_cur_state{name="0",type="Processor"} 0
184# HELP node_cooling_device_max_state Maximum throttle state of the cooling device 184# HELP node_cooling_device_max_state Maximum throttle state of the cooling device
185# TYPE node_cooling_device_max_state gauge 185# TYPE node_cooling_device_max_state gauge
186node_cooling_device_max_state{name="0",type="Processor"} 3 186node_cooling_device_max_state{name="0",type="Processor"} 3
187# HELP node_cpu_bug_info The `bugs` field of CPU information from /proc/cpuinfo.
188# TYPE node_cpu_bug_info gauge
189node_cpu_bug_info{bug="cpu_meltdown"} 1
190node_cpu_bug_info{bug="mds"} 1
191node_cpu_bug_info{bug="spectre_v1"} 1
192node_cpu_bug_info{bug="spectre_v2"} 1
187# HELP node_cpu_core_throttles_total Number of times this cpu core has been throttled. 193# HELP node_cpu_core_throttles_total Number of times this cpu core has been throttled.
188# TYPE node_cpu_core_throttles_total counter 194# TYPE node_cpu_core_throttles_total counter
189node_cpu_core_throttles_total{core="0",package="0"} 5 195node_cpu_core_throttles_total{core="0",package="0"} 5
190node_cpu_core_throttles_total{core="0",package="1"} 0 196node_cpu_core_throttles_total{core="0",package="1"} 0
191node_cpu_core_throttles_total{core="1",package="0"} 0 197node_cpu_core_throttles_total{core="1",package="0"} 0
192node_cpu_core_throttles_total{core="1",package="1"} 9 198node_cpu_core_throttles_total{core="1",package="1"} 9
199# HELP node_cpu_flag_info The `flags` field of CPU information from /proc/cpuinfo.
200# TYPE node_cpu_flag_info gauge
201node_cpu_flag_info{flag="aes"} 1
202node_cpu_flag_info{flag="avx"} 1
203node_cpu_flag_info{flag="avx2"} 1
204node_cpu_flag_info{flag="constant_tsc"} 1
193# HELP node_cpu_guest_seconds_total Seconds the cpus spent in guests (VMs) for each mode. 205# HELP node_cpu_guest_seconds_total Seconds the cpus spent in guests (VMs) for each mode.
194# TYPE node_cpu_guest_seconds_total counter 206# TYPE node_cpu_guest_seconds_total counter
195node_cpu_guest_seconds_total{cpu="0",mode="nice"} 0.01 207node_cpu_guest_seconds_total{cpu="0",mode="nice"} 0.01
diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt
index e8a5779..02ae152 100644
--- a/collector/fixtures/e2e-output.txt
+++ b/collector/fixtures/e2e-output.txt
@@ -232,12 +232,24 @@ node_cooling_device_cur_state{name="0",type="Processor"} 0
232# HELP node_cooling_device_max_state Maximum throttle state of the cooling device 232# HELP node_cooling_device_max_state Maximum throttle state of the cooling device
233# TYPE node_cooling_device_max_state gauge 233# TYPE node_cooling_device_max_state gauge
234node_cooling_device_max_state{name="0",type="Processor"} 3 234node_cooling_device_max_state{name="0",type="Processor"} 3
235# HELP node_cpu_bug_info The `bugs` field of CPU information from /proc/cpuinfo.
236# TYPE node_cpu_bug_info gauge
237node_cpu_bug_info{bug="cpu_meltdown"} 1
238node_cpu_bug_info{bug="mds"} 1
239node_cpu_bug_info{bug="spectre_v1"} 1
240node_cpu_bug_info{bug="spectre_v2"} 1
235# HELP node_cpu_core_throttles_total Number of times this cpu core has been throttled. 241# HELP node_cpu_core_throttles_total Number of times this cpu core has been throttled.
236# TYPE node_cpu_core_throttles_total counter 242# TYPE node_cpu_core_throttles_total counter
237node_cpu_core_throttles_total{core="0",package="0"} 5 243node_cpu_core_throttles_total{core="0",package="0"} 5
238node_cpu_core_throttles_total{core="0",package="1"} 0 244node_cpu_core_throttles_total{core="0",package="1"} 0
239node_cpu_core_throttles_total{core="1",package="0"} 0 245node_cpu_core_throttles_total{core="1",package="0"} 0
240node_cpu_core_throttles_total{core="1",package="1"} 9 246node_cpu_core_throttles_total{core="1",package="1"} 9
247# HELP node_cpu_flag_info The `flags` field of CPU information from /proc/cpuinfo.
248# TYPE node_cpu_flag_info gauge
249node_cpu_flag_info{flag="aes"} 1
250node_cpu_flag_info{flag="avx"} 1
251node_cpu_flag_info{flag="avx2"} 1
252node_cpu_flag_info{flag="constant_tsc"} 1
241# HELP node_cpu_guest_seconds_total Seconds the cpus spent in guests (VMs) for each mode. 253# HELP node_cpu_guest_seconds_total Seconds the cpus spent in guests (VMs) for each mode.
242# TYPE node_cpu_guest_seconds_total counter 254# TYPE node_cpu_guest_seconds_total counter
243node_cpu_guest_seconds_total{cpu="0",mode="nice"} 0.01 255node_cpu_guest_seconds_total{cpu="0",mode="nice"} 0.01
diff --git a/end-to-end-test.sh b/end-to-end-test.sh
index 961dd27..955ab1d 100755
--- a/end-to-end-test.sh
+++ b/end-to-end-test.sh
@@ -107,6 +107,8 @@ fi
107 --collector.qdisc.fixtures="collector/fixtures/qdisc/" \ 107 --collector.qdisc.fixtures="collector/fixtures/qdisc/" \
108 --collector.netclass.ignored-devices="(bond0|dmz|int)" \ 108 --collector.netclass.ignored-devices="(bond0|dmz|int)" \
109 --collector.cpu.info \ 109 --collector.cpu.info \
110 --collector.cpu.info.flags-include="^(aes|avx.?|constant_tsc)$" \
111 --collector.cpu.info.bugs-include="^(cpu_meltdown|spectre_.*|mds)$" \
110 --web.listen-address "127.0.0.1:${port}" \ 112 --web.listen-address "127.0.0.1:${port}" \
111 --log.level="debug" > "${tmpdir}/node_exporter.log" 2>&1 & 113 --log.level="debug" > "${tmpdir}/node_exporter.log" 2>&1 &
112 114