aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Kochie <superq@gmail.com>2020-05-25 13:51:50 +0200
committerGitHub <noreply@github.com>2020-05-25 13:51:50 +0200
commit7e7845a29f7cb5623683d8d67a71fe64639557a3 (patch)
treec17f3d60f0674c7794f4d9b633b1628f35c55523
parentb8847b5b3237c18aa25426fccb3d69a9bdff6ee1 (diff)
parent3565316d7e306d60ad74248291eaabb3410f2972 (diff)
downloadprometheus_node_collector-7e7845a29f7cb5623683d8d67a71fe64639557a3.tar.bz2
prometheus_node_collector-7e7845a29f7cb5623683d8d67a71fe64639557a3.tar.xz
prometheus_node_collector-7e7845a29f7cb5623683d8d67a71fe64639557a3.zip
Merge pull request #1711 from prometheus/superq/cpu_cache
Linux CPU: Cache CPU metrics
-rw-r--r--.circleci/config.yml2
-rw-r--r--CHANGELOG.md2
-rw-r--r--collector/cpu_linux.go85
3 files changed, 86 insertions, 3 deletions
diff --git a/.circleci/config.yml b/.circleci/config.yml
index 19c0f1b..018b8b1 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -28,7 +28,7 @@ jobs:
28 steps: 28 steps:
29 - checkout 29 - checkout
30 - run: sudo pip install codespell 30 - run: sudo pip install codespell
31 - run: codespell --skip=".git,./vendor,ttar,go.mod,go.sum,*pem" -L uint,packages\',uptodate 31 - run: codespell --skip=".git,./vendor,ttar,go.mod,go.sum,*pem,./collector/fixtures" -L uint,packages\',uptodate
32 32
33 build: 33 build:
34 machine: 34 machine:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a5d884f..9e05d0f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
3* [CHANGE] 3* [CHANGE]
4* [FEATURE] 4* [FEATURE]
5* [ENHANCEMENT] 5* [ENHANCEMENT]
6* [BUGFIX] 6* [BUGFIX] Linux CPU: Cache CPU metrics to make them monotonically increasing #1711
7 7
8## 1.0.0-rc.1 / 2020-05-14 8## 1.0.0-rc.1 / 2020-05-14
9 9
diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go
index ae8ee53..dfa4d4a 100644
--- a/collector/cpu_linux.go
+++ b/collector/cpu_linux.go
@@ -19,6 +19,7 @@ import (
19 "fmt" 19 "fmt"
20 "path/filepath" 20 "path/filepath"
21 "strconv" 21 "strconv"
22 "sync"
22 23
23 "github.com/go-kit/kit/log" 24 "github.com/go-kit/kit/log"
24 "github.com/go-kit/kit/log/level" 25 "github.com/go-kit/kit/log/level"
@@ -35,6 +36,8 @@ type cpuCollector struct {
35 cpuCoreThrottle *prometheus.Desc 36 cpuCoreThrottle *prometheus.Desc
36 cpuPackageThrottle *prometheus.Desc 37 cpuPackageThrottle *prometheus.Desc
37 logger log.Logger 38 logger log.Logger
39 cpuStats []procfs.CPUStat
40 cpuStatsMutex sync.Mutex
38} 41}
39 42
40var ( 43var (
@@ -203,7 +206,12 @@ func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error {
203 return err 206 return err
204 } 207 }
205 208
206 for cpuID, cpuStat := range stats.CPU { 209 c.updateCPUStats(stats.CPU)
210
211 // Acquire a lock to read the stats.
212 c.cpuStatsMutex.Lock()
213 defer c.cpuStatsMutex.Unlock()
214 for cpuID, cpuStat := range c.cpuStats {
207 cpuNum := strconv.Itoa(cpuID) 215 cpuNum := strconv.Itoa(cpuID)
208 ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuNum, "user") 216 ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuNum, "user")
209 ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuNum, "nice") 217 ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuNum, "nice")
@@ -221,3 +229,78 @@ func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error {
221 229
222 return nil 230 return nil
223} 231}
232
233// updateCPUStats updates the internal cache of CPU stats.
234func (c *cpuCollector) updateCPUStats(newStats []procfs.CPUStat) {
235 // Acquire a lock to update the stats.
236 c.cpuStatsMutex.Lock()
237 defer c.cpuStatsMutex.Unlock()
238
239 // Reset the cache if the list of CPUs has changed.
240 if len(c.cpuStats) != len(newStats) {
241 c.cpuStats = make([]procfs.CPUStat, len(newStats))
242 }
243
244 for i, n := range newStats {
245 // If idle jumps backwards, assume we had a hotplug event and reset the stats for this CPU.
246 if n.Idle < c.cpuStats[i].Idle {
247 level.Warn(c.logger).Log("msg", "CPU Idle counter jumped backwards, possible hotplug event, resetting CPU stats", "cpu", i, "old_value", c.cpuStats[i].Idle, "new_value", n.Idle)
248 c.cpuStats[i] = procfs.CPUStat{}
249 }
250 c.cpuStats[i].Idle = n.Idle
251
252 if n.User >= c.cpuStats[i].User {
253 c.cpuStats[i].User = n.User
254 } else {
255 level.Warn(c.logger).Log("msg", "CPU User counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].User, "new_value", n.User)
256 }
257
258 if n.Nice >= c.cpuStats[i].Nice {
259 c.cpuStats[i].Nice = n.Nice
260 } else {
261 level.Warn(c.logger).Log("msg", "CPU Nice counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Nice, "new_value", n.Nice)
262 }
263
264 if n.System >= c.cpuStats[i].System {
265 c.cpuStats[i].System = n.System
266 } else {
267 level.Warn(c.logger).Log("msg", "CPU System counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].System, "new_value", n.System)
268 }
269
270 if n.Iowait >= c.cpuStats[i].Iowait {
271 c.cpuStats[i].Iowait = n.Iowait
272 } else {
273 level.Warn(c.logger).Log("msg", "CPU Iowait counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Iowait, "new_value", n.Iowait)
274 }
275
276 if n.IRQ >= c.cpuStats[i].IRQ {
277 c.cpuStats[i].IRQ = n.IRQ
278 } else {
279 level.Warn(c.logger).Log("msg", "CPU IRQ counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].IRQ, "new_value", n.IRQ)
280 }
281
282 if n.SoftIRQ >= c.cpuStats[i].SoftIRQ {
283 c.cpuStats[i].SoftIRQ = n.SoftIRQ
284 } else {
285 level.Warn(c.logger).Log("msg", "CPU SoftIRQ counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].SoftIRQ, "new_value", n.SoftIRQ)
286 }
287
288 if n.Steal >= c.cpuStats[i].Steal {
289 c.cpuStats[i].Steal = n.Steal
290 } else {
291 level.Warn(c.logger).Log("msg", "CPU Steal counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Steal, "new_value", n.Steal)
292 }
293
294 if n.Guest >= c.cpuStats[i].Guest {
295 c.cpuStats[i].Guest = n.Guest
296 } else {
297 level.Warn(c.logger).Log("msg", "CPU Guest counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].Guest, "new_value", n.Guest)
298 }
299
300 if n.GuestNice >= c.cpuStats[i].GuestNice {
301 c.cpuStats[i].GuestNice = n.GuestNice
302 } else {
303 level.Warn(c.logger).Log("msg", "CPU GuestNice counter jumped backwards", "cpu", i, "old_value", c.cpuStats[i].GuestNice, "new_value", n.GuestNice)
304 }
305 }
306}