diff options
author | domchan <31119455+domgoer@users.noreply.github.com> | 2020-07-18 00:32:23 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-07-17 18:32:23 +0200 |
commit | 503e4fc8486c0082d6bd8c53fad646bcfafeedf6 (patch) | |
tree | 51cf53ed86b640630b69d9c45719592b71686bd8 | |
parent | f4b89c79a25fa10a397075b8bc3dd09314438fa2 (diff) | |
download | prometheus_node_collector-503e4fc8486c0082d6bd8c53fad646bcfafeedf6.tar.bz2 prometheus_node_collector-503e4fc8486c0082d6bd8c53fad646bcfafeedf6.tar.xz prometheus_node_collector-503e4fc8486c0082d6bd8c53fad646bcfafeedf6.zip |
Expose cpu bugs and flags as info metrics. (#1788)
* Expose cpu bugs and flags as info metrics with a regexp filter.
* Automatically enable CPU info metrics when using flags or bugs feature.
Signed-off-by: domgoer <domdoumc@gmail.com>
-rw-r--r-- | collector/cpu_linux.go | 74 | ||||
-rw-r--r-- | collector/fixtures/e2e-64k-page-output.txt | 12 | ||||
-rw-r--r-- | collector/fixtures/e2e-output.txt | 12 | ||||
-rwxr-xr-x | end-to-end-test.sh | 2 |
4 files changed, 98 insertions, 2 deletions
diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go index dfa4d4a..65476d3 100644 --- a/collector/cpu_linux.go +++ b/collector/cpu_linux.go | |||
@@ -18,6 +18,7 @@ package collector | |||
18 | import ( | 18 | import ( |
19 | "fmt" | 19 | "fmt" |
20 | "path/filepath" | 20 | "path/filepath" |
21 | "regexp" | ||
21 | "strconv" | 22 | "strconv" |
22 | "sync" | 23 | "sync" |
23 | 24 | ||
@@ -32,16 +33,23 @@ type cpuCollector struct { | |||
32 | fs procfs.FS | 33 | fs procfs.FS |
33 | cpu *prometheus.Desc | 34 | cpu *prometheus.Desc |
34 | cpuInfo *prometheus.Desc | 35 | cpuInfo *prometheus.Desc |
36 | cpuFlagsInfo *prometheus.Desc | ||
37 | cpuBugsInfo *prometheus.Desc | ||
35 | cpuGuest *prometheus.Desc | 38 | cpuGuest *prometheus.Desc |
36 | cpuCoreThrottle *prometheus.Desc | 39 | cpuCoreThrottle *prometheus.Desc |
37 | cpuPackageThrottle *prometheus.Desc | 40 | cpuPackageThrottle *prometheus.Desc |
38 | logger log.Logger | 41 | logger log.Logger |
39 | cpuStats []procfs.CPUStat | 42 | cpuStats []procfs.CPUStat |
40 | cpuStatsMutex sync.Mutex | 43 | cpuStatsMutex sync.Mutex |
44 | |||
45 | cpuFlagsIncludeRegexp *regexp.Regexp | ||
46 | cpuBugsIncludeRegexp *regexp.Regexp | ||
41 | } | 47 | } |
42 | 48 | ||
43 | var ( | 49 | var ( |
44 | enableCPUInfo = kingpin.Flag("collector.cpu.info", "Enables metric cpu_info").Bool() | 50 | enableCPUInfo = kingpin.Flag("collector.cpu.info", "Enables metric cpu_info").Bool() |
51 | flagsInclude = kingpin.Flag("collector.cpu.info.flags-include", "Filter the `flags` field in cpuInfo with a value that must be a regular expression").String() | ||
52 | bugsInclude = kingpin.Flag("collector.cpu.info.bugs-include", "Filter the `bugs` field in cpuInfo with a value that must be a regular expression").String() | ||
45 | ) | 53 | ) |
46 | 54 | ||
47 | func init() { | 55 | func init() { |
@@ -54,7 +62,7 @@ func NewCPUCollector(logger log.Logger) (Collector, error) { | |||
54 | if err != nil { | 62 | if err != nil { |
55 | return nil, fmt.Errorf("failed to open procfs: %w", err) | 63 | return nil, fmt.Errorf("failed to open procfs: %w", err) |
56 | } | 64 | } |
57 | return &cpuCollector{ | 65 | c := &cpuCollector{ |
58 | fs: fs, | 66 | fs: fs, |
59 | cpu: nodeCPUSecondsDesc, | 67 | cpu: nodeCPUSecondsDesc, |
60 | cpuInfo: prometheus.NewDesc( | 68 | cpuInfo: prometheus.NewDesc( |
@@ -62,6 +70,16 @@ func NewCPUCollector(logger log.Logger) (Collector, error) { | |||
62 | "CPU information from /proc/cpuinfo.", | 70 | "CPU information from /proc/cpuinfo.", |
63 | []string{"package", "core", "cpu", "vendor", "family", "model", "model_name", "microcode", "stepping", "cachesize"}, nil, | 71 | []string{"package", "core", "cpu", "vendor", "family", "model", "model_name", "microcode", "stepping", "cachesize"}, nil, |
64 | ), | 72 | ), |
73 | cpuFlagsInfo: prometheus.NewDesc( | ||
74 | prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "flag_info"), | ||
75 | "The `flags` field of CPU information from /proc/cpuinfo.", | ||
76 | []string{"flag"}, nil, | ||
77 | ), | ||
78 | cpuBugsInfo: prometheus.NewDesc( | ||
79 | prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "bug_info"), | ||
80 | "The `bugs` field of CPU information from /proc/cpuinfo.", | ||
81 | []string{"bug"}, nil, | ||
82 | ), | ||
65 | cpuGuest: prometheus.NewDesc( | 83 | cpuGuest: prometheus.NewDesc( |
66 | prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "guest_seconds_total"), | 84 | prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "guest_seconds_total"), |
67 | "Seconds the cpus spent in guests (VMs) for each mode.", | 85 | "Seconds the cpus spent in guests (VMs) for each mode.", |
@@ -78,7 +96,34 @@ func NewCPUCollector(logger log.Logger) (Collector, error) { | |||
78 | []string{"package"}, nil, | 96 | []string{"package"}, nil, |
79 | ), | 97 | ), |
80 | logger: logger, | 98 | logger: logger, |
81 | }, nil | 99 | } |
100 | err = c.compileIncludeFlags(flagsInclude, bugsInclude) | ||
101 | if err != nil { | ||
102 | return nil, fmt.Errorf("fail to compile --collector.cpu.info.flags-include and --collector.cpu.info.bugs-include, the values of them must be regular expressions: %w", err) | ||
103 | } | ||
104 | return c, nil | ||
105 | } | ||
106 | |||
107 | func (c *cpuCollector) compileIncludeFlags(flagsIncludeFlag, bugsIncludeFlag *string) error { | ||
108 | if (*flagsIncludeFlag != "" || *bugsIncludeFlag != "") && !*enableCPUInfo { | ||
109 | *enableCPUInfo = true | ||
110 | level.Info(c.logger).Log("msg", "--collector.cpu.info has been set to `true` because you set the following flags, like --collector.cpu.info.flags-include and --collector.cpu.info.bugs-include") | ||
111 | } | ||
112 | |||
113 | var err error | ||
114 | if *flagsIncludeFlag != "" { | ||
115 | c.cpuFlagsIncludeRegexp, err = regexp.Compile(*flagsIncludeFlag) | ||
116 | if err != nil { | ||
117 | return err | ||
118 | } | ||
119 | } | ||
120 | if *bugsIncludeFlag != "" { | ||
121 | c.cpuBugsIncludeRegexp, err = regexp.Compile(*bugsIncludeFlag) | ||
122 | if err != nil { | ||
123 | return err | ||
124 | } | ||
125 | } | ||
126 | return nil | ||
82 | } | 127 | } |
83 | 128 | ||
84 | // Update implements Collector and exposes cpu related metrics from /proc/stat and /sys/.../cpu/. | 129 | // Update implements Collector and exposes cpu related metrics from /proc/stat and /sys/.../cpu/. |
@@ -117,6 +162,31 @@ func (c *cpuCollector) updateInfo(ch chan<- prometheus.Metric) error { | |||
117 | cpu.Microcode, | 162 | cpu.Microcode, |
118 | cpu.Stepping, | 163 | cpu.Stepping, |
119 | cpu.CacheSize) | 164 | cpu.CacheSize) |
165 | |||
166 | if err := updateFieldInfo(cpu.Flags, c.cpuFlagsIncludeRegexp, c.cpuFlagsInfo, ch); err != nil { | ||
167 | return err | ||
168 | } | ||
169 | if err := updateFieldInfo(cpu.Bugs, c.cpuBugsIncludeRegexp, c.cpuBugsInfo, ch); err != nil { | ||
170 | return err | ||
171 | } | ||
172 | } | ||
173 | return nil | ||
174 | } | ||
175 | |||
176 | func updateFieldInfo(valueList []string, filter *regexp.Regexp, desc *prometheus.Desc, ch chan<- prometheus.Metric) error { | ||
177 | if filter == nil { | ||
178 | return nil | ||
179 | } | ||
180 | |||
181 | for _, val := range valueList { | ||
182 | if !filter.MatchString(val) { | ||
183 | continue | ||
184 | } | ||
185 | ch <- prometheus.MustNewConstMetric(desc, | ||
186 | prometheus.GaugeValue, | ||
187 | 1, | ||
188 | val, | ||
189 | ) | ||
120 | } | 190 | } |
121 | return nil | 191 | return nil |
122 | } | 192 | } |
diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 7b857ff..cbed127 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt | |||
@@ -184,12 +184,24 @@ node_cooling_device_cur_state{name="0",type="Processor"} 0 | |||
184 | # HELP node_cooling_device_max_state Maximum throttle state of the cooling device | 184 | # HELP node_cooling_device_max_state Maximum throttle state of the cooling device |
185 | # TYPE node_cooling_device_max_state gauge | 185 | # TYPE node_cooling_device_max_state gauge |
186 | node_cooling_device_max_state{name="0",type="Processor"} 3 | 186 | node_cooling_device_max_state{name="0",type="Processor"} 3 |
187 | # HELP node_cpu_bug_info The `bugs` field of CPU information from /proc/cpuinfo. | ||
188 | # TYPE node_cpu_bug_info gauge | ||
189 | node_cpu_bug_info{bug="cpu_meltdown"} 1 | ||
190 | node_cpu_bug_info{bug="mds"} 1 | ||
191 | node_cpu_bug_info{bug="spectre_v1"} 1 | ||
192 | node_cpu_bug_info{bug="spectre_v2"} 1 | ||
187 | # HELP node_cpu_core_throttles_total Number of times this cpu core has been throttled. | 193 | # HELP node_cpu_core_throttles_total Number of times this cpu core has been throttled. |
188 | # TYPE node_cpu_core_throttles_total counter | 194 | # TYPE node_cpu_core_throttles_total counter |
189 | node_cpu_core_throttles_total{core="0",package="0"} 5 | 195 | node_cpu_core_throttles_total{core="0",package="0"} 5 |
190 | node_cpu_core_throttles_total{core="0",package="1"} 0 | 196 | node_cpu_core_throttles_total{core="0",package="1"} 0 |
191 | node_cpu_core_throttles_total{core="1",package="0"} 0 | 197 | node_cpu_core_throttles_total{core="1",package="0"} 0 |
192 | node_cpu_core_throttles_total{core="1",package="1"} 9 | 198 | node_cpu_core_throttles_total{core="1",package="1"} 9 |
199 | # HELP node_cpu_flag_info The `flags` field of CPU information from /proc/cpuinfo. | ||
200 | # TYPE node_cpu_flag_info gauge | ||
201 | node_cpu_flag_info{flag="aes"} 1 | ||
202 | node_cpu_flag_info{flag="avx"} 1 | ||
203 | node_cpu_flag_info{flag="avx2"} 1 | ||
204 | node_cpu_flag_info{flag="constant_tsc"} 1 | ||
193 | # HELP node_cpu_guest_seconds_total Seconds the cpus spent in guests (VMs) for each mode. | 205 | # HELP node_cpu_guest_seconds_total Seconds the cpus spent in guests (VMs) for each mode. |
194 | # TYPE node_cpu_guest_seconds_total counter | 206 | # TYPE node_cpu_guest_seconds_total counter |
195 | node_cpu_guest_seconds_total{cpu="0",mode="nice"} 0.01 | 207 | node_cpu_guest_seconds_total{cpu="0",mode="nice"} 0.01 |
diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index e8a5779..02ae152 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt | |||
@@ -232,12 +232,24 @@ node_cooling_device_cur_state{name="0",type="Processor"} 0 | |||
232 | # HELP node_cooling_device_max_state Maximum throttle state of the cooling device | 232 | # HELP node_cooling_device_max_state Maximum throttle state of the cooling device |
233 | # TYPE node_cooling_device_max_state gauge | 233 | # TYPE node_cooling_device_max_state gauge |
234 | node_cooling_device_max_state{name="0",type="Processor"} 3 | 234 | node_cooling_device_max_state{name="0",type="Processor"} 3 |
235 | # HELP node_cpu_bug_info The `bugs` field of CPU information from /proc/cpuinfo. | ||
236 | # TYPE node_cpu_bug_info gauge | ||
237 | node_cpu_bug_info{bug="cpu_meltdown"} 1 | ||
238 | node_cpu_bug_info{bug="mds"} 1 | ||
239 | node_cpu_bug_info{bug="spectre_v1"} 1 | ||
240 | node_cpu_bug_info{bug="spectre_v2"} 1 | ||
235 | # HELP node_cpu_core_throttles_total Number of times this cpu core has been throttled. | 241 | # HELP node_cpu_core_throttles_total Number of times this cpu core has been throttled. |
236 | # TYPE node_cpu_core_throttles_total counter | 242 | # TYPE node_cpu_core_throttles_total counter |
237 | node_cpu_core_throttles_total{core="0",package="0"} 5 | 243 | node_cpu_core_throttles_total{core="0",package="0"} 5 |
238 | node_cpu_core_throttles_total{core="0",package="1"} 0 | 244 | node_cpu_core_throttles_total{core="0",package="1"} 0 |
239 | node_cpu_core_throttles_total{core="1",package="0"} 0 | 245 | node_cpu_core_throttles_total{core="1",package="0"} 0 |
240 | node_cpu_core_throttles_total{core="1",package="1"} 9 | 246 | node_cpu_core_throttles_total{core="1",package="1"} 9 |
247 | # HELP node_cpu_flag_info The `flags` field of CPU information from /proc/cpuinfo. | ||
248 | # TYPE node_cpu_flag_info gauge | ||
249 | node_cpu_flag_info{flag="aes"} 1 | ||
250 | node_cpu_flag_info{flag="avx"} 1 | ||
251 | node_cpu_flag_info{flag="avx2"} 1 | ||
252 | node_cpu_flag_info{flag="constant_tsc"} 1 | ||
241 | # HELP node_cpu_guest_seconds_total Seconds the cpus spent in guests (VMs) for each mode. | 253 | # HELP node_cpu_guest_seconds_total Seconds the cpus spent in guests (VMs) for each mode. |
242 | # TYPE node_cpu_guest_seconds_total counter | 254 | # TYPE node_cpu_guest_seconds_total counter |
243 | node_cpu_guest_seconds_total{cpu="0",mode="nice"} 0.01 | 255 | node_cpu_guest_seconds_total{cpu="0",mode="nice"} 0.01 |
diff --git a/end-to-end-test.sh b/end-to-end-test.sh index 961dd27..955ab1d 100755 --- a/end-to-end-test.sh +++ b/end-to-end-test.sh | |||
@@ -107,6 +107,8 @@ fi | |||
107 | --collector.qdisc.fixtures="collector/fixtures/qdisc/" \ | 107 | --collector.qdisc.fixtures="collector/fixtures/qdisc/" \ |
108 | --collector.netclass.ignored-devices="(bond0|dmz|int)" \ | 108 | --collector.netclass.ignored-devices="(bond0|dmz|int)" \ |
109 | --collector.cpu.info \ | 109 | --collector.cpu.info \ |
110 | --collector.cpu.info.flags-include="^(aes|avx.?|constant_tsc)$" \ | ||
111 | --collector.cpu.info.bugs-include="^(cpu_meltdown|spectre_.*|mds)$" \ | ||
110 | --web.listen-address "127.0.0.1:${port}" \ | 112 | --web.listen-address "127.0.0.1:${port}" \ |
111 | --log.level="debug" > "${tmpdir}/node_exporter.log" 2>&1 & | 113 | --log.level="debug" > "${tmpdir}/node_exporter.log" 2>&1 & |
112 | 114 | ||