diff options
author | James Hartig <fastest963@gmail.com> | 2018-08-14 08:28:26 -0400 |
---|---|---|
committer | Ben Kochie <superq@gmail.com> | 2018-08-14 14:28:26 +0200 |
commit | 60c827231afe2a7621ee632e02c3dd94e0faa19f (patch) | |
tree | 585fc6c40af532132527ff6a7265a5121d0b6c6a | |
parent | fe5a1178313b2f56e53d4194ae306373d92a208a (diff) | |
download | prometheus_node_collector-60c827231afe2a7621ee632e02c3dd94e0faa19f.tar.bz2 prometheus_node_collector-60c827231afe2a7621ee632e02c3dd94e0faa19f.tar.xz prometheus_node_collector-60c827231afe2a7621ee632e02c3dd94e0faa19f.zip |
NRestarts or NRefused aren't available on older systemd versions (#1039)
* If NRestarts or NRefused are not available, don't ignore the unit itself
* Don't report systemd metrics (NRestarts/NRefused) that are not available
Signed-off-by: James Hartig <james@getadmiral.com>
-rw-r--r-- | CHANGELOG.md | 2 | ||||
-rw-r--r-- | collector/systemd_linux.go | 42 |
2 files changed, 28 insertions, 16 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 7758cc9..1401f84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md | |||
@@ -9,6 +9,7 @@ The wifi collector is disabled by default due to suspected caching issues and go | |||
9 | * https://github.com/prometheus/node_exporter/issues/1008 | 9 | * https://github.com/prometheus/node_exporter/issues/1008 |
10 | 10 | ||
11 | * [CHANGE] Filter out non-installed units when collecting all systemd units #1011 | 11 | * [CHANGE] Filter out non-installed units when collecting all systemd units #1011 |
12 | * [CHANGE] `service_restart_total` and `socket_refused_connections_total` will not be reported if you're running an older version of systemd | ||
12 | * [FEATURE] Collect NRefused property for systemd socket units (available as of systemd v239) | 13 | * [FEATURE] Collect NRefused property for systemd socket units (available as of systemd v239) |
13 | * [FEATURE] Collect NRestarts property for systemd service units | 14 | * [FEATURE] Collect NRestarts property for systemd service units |
14 | * [FEATURE] Add socket unit stats to systemd collector #968 | 15 | * [FEATURE] Add socket unit stats to systemd collector #968 |
@@ -16,6 +17,7 @@ The wifi collector is disabled by default due to suspected caching issues and go | |||
16 | * [ENHANCEMENT] | 17 | * [ENHANCEMENT] |
17 | 18 | ||
18 | * [BUGFIX] Fix goroutine leak in supervisord collector | 19 | * [BUGFIX] Fix goroutine leak in supervisord collector |
20 | * [BUGFIX] Systemd units will not be ignored if you're running older versions of systemd #1039 | ||
19 | * [BUGFIX] Handle vanishing PIDs #1043 | 21 | * [BUGFIX] Handle vanishing PIDs #1043 |
20 | 22 | ||
21 | ## 0.16.0 / 2018-05-15 | 23 | ## 0.16.0 / 2018-05-15 |
diff --git a/collector/systemd_linux.go b/collector/systemd_linux.go index 57aff5a..7140b7b 100644 --- a/collector/systemd_linux.go +++ b/collector/systemd_linux.go | |||
@@ -140,10 +140,10 @@ func (c *systemdCollector) collectUnitStatusMetrics(ch chan<- prometheus.Metric, | |||
140 | c.unitDesc, prometheus.GaugeValue, isActive, | 140 | c.unitDesc, prometheus.GaugeValue, isActive, |
141 | unit.Name, stateName) | 141 | unit.Name, stateName) |
142 | } | 142 | } |
143 | if strings.HasSuffix(unit.Name, ".service") { | 143 | if strings.HasSuffix(unit.Name, ".service") && unit.nRestarts != nil { |
144 | ch <- prometheus.MustNewConstMetric( | 144 | ch <- prometheus.MustNewConstMetric( |
145 | c.nRestartsDesc, prometheus.CounterValue, | 145 | c.nRestartsDesc, prometheus.CounterValue, |
146 | float64(unit.nRestarts), unit.Name) | 146 | float64(*unit.nRestarts), unit.Name) |
147 | } | 147 | } |
148 | } | 148 | } |
149 | } | 149 | } |
@@ -160,9 +160,11 @@ func (c *systemdCollector) collectSockets(ch chan<- prometheus.Metric, units []u | |||
160 | ch <- prometheus.MustNewConstMetric( | 160 | ch <- prometheus.MustNewConstMetric( |
161 | c.socketCurrentConnectionsDesc, prometheus.GaugeValue, | 161 | c.socketCurrentConnectionsDesc, prometheus.GaugeValue, |
162 | float64(unit.currentConnections), unit.Name) | 162 | float64(unit.currentConnections), unit.Name) |
163 | ch <- prometheus.MustNewConstMetric( | 163 | if unit.refusedConnections != nil { |
164 | c.socketRefusedConnectionsDesc, prometheus.GaugeValue, | 164 | ch <- prometheus.MustNewConstMetric( |
165 | float64(unit.refusedConnections), unit.Name) | 165 | c.socketRefusedConnectionsDesc, prometheus.GaugeValue, |
166 | float64(*unit.refusedConnections), unit.Name) | ||
167 | } | ||
166 | } | 168 | } |
167 | } | 169 | } |
168 | 170 | ||
@@ -212,10 +214,10 @@ type unit struct { | |||
212 | dbus.UnitStatus | 214 | dbus.UnitStatus |
213 | lastTriggerUsec uint64 | 215 | lastTriggerUsec uint64 |
214 | startTimeUsec uint64 | 216 | startTimeUsec uint64 |
215 | nRestarts uint32 | 217 | nRestarts *uint32 |
216 | acceptedConnections uint32 | 218 | acceptedConnections uint32 |
217 | currentConnections uint32 | 219 | currentConnections uint32 |
218 | refusedConnections uint32 | 220 | refusedConnections *uint32 |
219 | } | 221 | } |
220 | 222 | ||
221 | func (c *systemdCollector) getAllUnits() ([]unit, error) { | 223 | func (c *systemdCollector) getAllUnits() ([]unit, error) { |
@@ -241,40 +243,47 @@ func (c *systemdCollector) getAllUnits() ([]unit, error) { | |||
241 | if strings.HasSuffix(unit.Name, ".timer") { | 243 | if strings.HasSuffix(unit.Name, ".timer") { |
242 | lastTriggerValue, err := conn.GetUnitTypeProperty(unit.Name, "Timer", "LastTriggerUSec") | 244 | lastTriggerValue, err := conn.GetUnitTypeProperty(unit.Name, "Timer", "LastTriggerUSec") |
243 | if err != nil { | 245 | if err != nil { |
244 | return nil, fmt.Errorf("couldn't get unit '%s' LastTriggerUSec: %s", unit.Name, err) | 246 | log.Debugf("couldn't get unit '%s' LastTriggerUSec: %s\n", unit.Name, err) |
247 | continue | ||
245 | } | 248 | } |
246 | 249 | ||
247 | unit.lastTriggerUsec = lastTriggerValue.Value.Value().(uint64) | 250 | unit.lastTriggerUsec = lastTriggerValue.Value.Value().(uint64) |
248 | } | 251 | } |
249 | if strings.HasSuffix(unit.Name, ".service") { | 252 | if strings.HasSuffix(unit.Name, ".service") { |
250 | nRestarts, err := conn.GetUnitTypeProperty(unit.Name, "Service", "NRestarts") | 253 | // NRestarts wasn't added until systemd 235. |
254 | restartsCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "NRestarts") | ||
251 | if err != nil { | 255 | if err != nil { |
252 | log.Debugf("couldn't get unit '%s' NRestarts: %s\n", unit.Name, err) | 256 | log.Debugf("couldn't get unit '%s' NRestarts: %s\n", unit.Name, err) |
253 | continue | 257 | } else { |
258 | nRestarts := restartsCount.Value.Value().(uint32) | ||
259 | unit.nRestarts = &nRestarts | ||
254 | } | 260 | } |
255 | unit.nRestarts = nRestarts.Value.Value().(uint32) | ||
256 | } | 261 | } |
257 | 262 | ||
258 | if strings.HasSuffix(unit.Name, ".socket") { | 263 | if strings.HasSuffix(unit.Name, ".socket") { |
259 | acceptedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NAccepted") | 264 | acceptedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NAccepted") |
260 | if err != nil { | 265 | if err != nil { |
261 | return nil, fmt.Errorf("couldn't get unit '%s' NAccepted: %s", unit.Name, err) | 266 | log.Debugf("couldn't get unit '%s' NAccepted: %s\n", unit.Name, err) |
267 | continue | ||
262 | } | 268 | } |
263 | 269 | ||
264 | unit.acceptedConnections = acceptedConnectionCount.Value.Value().(uint32) | 270 | unit.acceptedConnections = acceptedConnectionCount.Value.Value().(uint32) |
265 | 271 | ||
266 | currentConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NConnections") | 272 | currentConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NConnections") |
267 | if err != nil { | 273 | if err != nil { |
268 | return nil, fmt.Errorf("couldn't get unit '%s' NConnections: %s", unit.Name, err) | 274 | log.Debugf("couldn't get unit '%s' NConnections: %s\n", unit.Name, err) |
275 | continue | ||
269 | } | 276 | } |
270 | unit.currentConnections = currentConnectionCount.Value.Value().(uint32) | 277 | unit.currentConnections = currentConnectionCount.Value.Value().(uint32) |
271 | 278 | ||
279 | // NRefused wasn't added until systemd 239. | ||
272 | refusedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NRefused") | 280 | refusedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NRefused") |
273 | if err != nil { | 281 | if err != nil { |
274 | log.Debugf("couldn't get unit '%s' NRefused: %s\n", unit.Name, err) | 282 | log.Debugf("couldn't get unit '%s' NRefused: %s\n", unit.Name, err) |
275 | continue | 283 | } else { |
284 | nRefused := refusedConnectionCount.Value.Value().(uint32) | ||
285 | unit.refusedConnections = &nRefused | ||
276 | } | 286 | } |
277 | unit.refusedConnections = refusedConnectionCount.Value.Value().(uint32) | ||
278 | } | 287 | } |
279 | 288 | ||
280 | if unit.ActiveState != "active" { | 289 | if unit.ActiveState != "active" { |
@@ -282,7 +291,8 @@ func (c *systemdCollector) getAllUnits() ([]unit, error) { | |||
282 | } else { | 291 | } else { |
283 | timestampValue, err := conn.GetUnitProperty(unit.Name, "ActiveEnterTimestamp") | 292 | timestampValue, err := conn.GetUnitProperty(unit.Name, "ActiveEnterTimestamp") |
284 | if err != nil { | 293 | if err != nil { |
285 | return nil, fmt.Errorf("couldn't get unit '%s' StartTimeUsec: %s", unit.Name, err) | 294 | log.Debugf("couldn't get unit '%s' StartTimeUsec: %s\n", unit.Name, err) |
295 | continue | ||
286 | } | 296 | } |
287 | 297 | ||
288 | unit.startTimeUsec = timestampValue.Value.Value().(uint64) | 298 | unit.startTimeUsec = timestampValue.Value.Value().(uint64) |