aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Hartig <fastest963@gmail.com>2018-08-14 08:28:26 -0400
committerBen Kochie <superq@gmail.com>2018-08-14 14:28:26 +0200
commit60c827231afe2a7621ee632e02c3dd94e0faa19f (patch)
tree585fc6c40af532132527ff6a7265a5121d0b6c6a
parentfe5a1178313b2f56e53d4194ae306373d92a208a (diff)
downloadprometheus_node_collector-60c827231afe2a7621ee632e02c3dd94e0faa19f.tar.bz2
prometheus_node_collector-60c827231afe2a7621ee632e02c3dd94e0faa19f.tar.xz
prometheus_node_collector-60c827231afe2a7621ee632e02c3dd94e0faa19f.zip
NRestarts or NRefused aren't available on older systemd versions (#1039)
* If NRestarts or NRefused are not available, don't ignore the unit itself * Don't report systemd metrics (NRestarts/NRefused) that are not available Signed-off-by: James Hartig <james@getadmiral.com>
-rw-r--r--CHANGELOG.md2
-rw-r--r--collector/systemd_linux.go42
2 files changed, 28 insertions, 16 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7758cc9..1401f84 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ The wifi collector is disabled by default due to suspected caching issues and go
9* https://github.com/prometheus/node_exporter/issues/1008 9* https://github.com/prometheus/node_exporter/issues/1008
10 10
11* [CHANGE] Filter out non-installed units when collecting all systemd units #1011 11* [CHANGE] Filter out non-installed units when collecting all systemd units #1011
12* [CHANGE] `service_restart_total` and `socket_refused_connections_total` will not be reported if you're running an older version of systemd
12* [FEATURE] Collect NRefused property for systemd socket units (available as of systemd v239) 13* [FEATURE] Collect NRefused property for systemd socket units (available as of systemd v239)
13* [FEATURE] Collect NRestarts property for systemd service units 14* [FEATURE] Collect NRestarts property for systemd service units
14* [FEATURE] Add socket unit stats to systemd collector #968 15* [FEATURE] Add socket unit stats to systemd collector #968
@@ -16,6 +17,7 @@ The wifi collector is disabled by default due to suspected caching issues and go
16* [ENHANCEMENT] 17* [ENHANCEMENT]
17 18
18* [BUGFIX] Fix goroutine leak in supervisord collector 19* [BUGFIX] Fix goroutine leak in supervisord collector
20* [BUGFIX] Systemd units will not be ignored if you're running older versions of systemd #1039
19* [BUGFIX] Handle vanishing PIDs #1043 21* [BUGFIX] Handle vanishing PIDs #1043
20 22
21## 0.16.0 / 2018-05-15 23## 0.16.0 / 2018-05-15
diff --git a/collector/systemd_linux.go b/collector/systemd_linux.go
index 57aff5a..7140b7b 100644
--- a/collector/systemd_linux.go
+++ b/collector/systemd_linux.go
@@ -140,10 +140,10 @@ func (c *systemdCollector) collectUnitStatusMetrics(ch chan<- prometheus.Metric,
140 c.unitDesc, prometheus.GaugeValue, isActive, 140 c.unitDesc, prometheus.GaugeValue, isActive,
141 unit.Name, stateName) 141 unit.Name, stateName)
142 } 142 }
143 if strings.HasSuffix(unit.Name, ".service") { 143 if strings.HasSuffix(unit.Name, ".service") && unit.nRestarts != nil {
144 ch <- prometheus.MustNewConstMetric( 144 ch <- prometheus.MustNewConstMetric(
145 c.nRestartsDesc, prometheus.CounterValue, 145 c.nRestartsDesc, prometheus.CounterValue,
146 float64(unit.nRestarts), unit.Name) 146 float64(*unit.nRestarts), unit.Name)
147 } 147 }
148 } 148 }
149} 149}
@@ -160,9 +160,11 @@ func (c *systemdCollector) collectSockets(ch chan<- prometheus.Metric, units []u
160 ch <- prometheus.MustNewConstMetric( 160 ch <- prometheus.MustNewConstMetric(
161 c.socketCurrentConnectionsDesc, prometheus.GaugeValue, 161 c.socketCurrentConnectionsDesc, prometheus.GaugeValue,
162 float64(unit.currentConnections), unit.Name) 162 float64(unit.currentConnections), unit.Name)
163 ch <- prometheus.MustNewConstMetric( 163 if unit.refusedConnections != nil {
164 c.socketRefusedConnectionsDesc, prometheus.GaugeValue, 164 ch <- prometheus.MustNewConstMetric(
165 float64(unit.refusedConnections), unit.Name) 165 c.socketRefusedConnectionsDesc, prometheus.GaugeValue,
166 float64(*unit.refusedConnections), unit.Name)
167 }
166 } 168 }
167} 169}
168 170
@@ -212,10 +214,10 @@ type unit struct {
212 dbus.UnitStatus 214 dbus.UnitStatus
213 lastTriggerUsec uint64 215 lastTriggerUsec uint64
214 startTimeUsec uint64 216 startTimeUsec uint64
215 nRestarts uint32 217 nRestarts *uint32
216 acceptedConnections uint32 218 acceptedConnections uint32
217 currentConnections uint32 219 currentConnections uint32
218 refusedConnections uint32 220 refusedConnections *uint32
219} 221}
220 222
221func (c *systemdCollector) getAllUnits() ([]unit, error) { 223func (c *systemdCollector) getAllUnits() ([]unit, error) {
@@ -241,40 +243,47 @@ func (c *systemdCollector) getAllUnits() ([]unit, error) {
241 if strings.HasSuffix(unit.Name, ".timer") { 243 if strings.HasSuffix(unit.Name, ".timer") {
242 lastTriggerValue, err := conn.GetUnitTypeProperty(unit.Name, "Timer", "LastTriggerUSec") 244 lastTriggerValue, err := conn.GetUnitTypeProperty(unit.Name, "Timer", "LastTriggerUSec")
243 if err != nil { 245 if err != nil {
244 return nil, fmt.Errorf("couldn't get unit '%s' LastTriggerUSec: %s", unit.Name, err) 246 log.Debugf("couldn't get unit '%s' LastTriggerUSec: %s\n", unit.Name, err)
247 continue
245 } 248 }
246 249
247 unit.lastTriggerUsec = lastTriggerValue.Value.Value().(uint64) 250 unit.lastTriggerUsec = lastTriggerValue.Value.Value().(uint64)
248 } 251 }
249 if strings.HasSuffix(unit.Name, ".service") { 252 if strings.HasSuffix(unit.Name, ".service") {
250 nRestarts, err := conn.GetUnitTypeProperty(unit.Name, "Service", "NRestarts") 253 // NRestarts wasn't added until systemd 235.
254 restartsCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "NRestarts")
251 if err != nil { 255 if err != nil {
252 log.Debugf("couldn't get unit '%s' NRestarts: %s\n", unit.Name, err) 256 log.Debugf("couldn't get unit '%s' NRestarts: %s\n", unit.Name, err)
253 continue 257 } else {
258 nRestarts := restartsCount.Value.Value().(uint32)
259 unit.nRestarts = &nRestarts
254 } 260 }
255 unit.nRestarts = nRestarts.Value.Value().(uint32)
256 } 261 }
257 262
258 if strings.HasSuffix(unit.Name, ".socket") { 263 if strings.HasSuffix(unit.Name, ".socket") {
259 acceptedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NAccepted") 264 acceptedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NAccepted")
260 if err != nil { 265 if err != nil {
261 return nil, fmt.Errorf("couldn't get unit '%s' NAccepted: %s", unit.Name, err) 266 log.Debugf("couldn't get unit '%s' NAccepted: %s\n", unit.Name, err)
267 continue
262 } 268 }
263 269
264 unit.acceptedConnections = acceptedConnectionCount.Value.Value().(uint32) 270 unit.acceptedConnections = acceptedConnectionCount.Value.Value().(uint32)
265 271
266 currentConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NConnections") 272 currentConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NConnections")
267 if err != nil { 273 if err != nil {
268 return nil, fmt.Errorf("couldn't get unit '%s' NConnections: %s", unit.Name, err) 274 log.Debugf("couldn't get unit '%s' NConnections: %s\n", unit.Name, err)
275 continue
269 } 276 }
270 unit.currentConnections = currentConnectionCount.Value.Value().(uint32) 277 unit.currentConnections = currentConnectionCount.Value.Value().(uint32)
271 278
279 // NRefused wasn't added until systemd 239.
272 refusedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NRefused") 280 refusedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NRefused")
273 if err != nil { 281 if err != nil {
274 log.Debugf("couldn't get unit '%s' NRefused: %s\n", unit.Name, err) 282 log.Debugf("couldn't get unit '%s' NRefused: %s\n", unit.Name, err)
275 continue 283 } else {
284 nRefused := refusedConnectionCount.Value.Value().(uint32)
285 unit.refusedConnections = &nRefused
276 } 286 }
277 unit.refusedConnections = refusedConnectionCount.Value.Value().(uint32)
278 } 287 }
279 288
280 if unit.ActiveState != "active" { 289 if unit.ActiveState != "active" {
@@ -282,7 +291,8 @@ func (c *systemdCollector) getAllUnits() ([]unit, error) {
282 } else { 291 } else {
283 timestampValue, err := conn.GetUnitProperty(unit.Name, "ActiveEnterTimestamp") 292 timestampValue, err := conn.GetUnitProperty(unit.Name, "ActiveEnterTimestamp")
284 if err != nil { 293 if err != nil {
285 return nil, fmt.Errorf("couldn't get unit '%s' StartTimeUsec: %s", unit.Name, err) 294 log.Debugf("couldn't get unit '%s' StartTimeUsec: %s\n", unit.Name, err)
295 continue
286 } 296 }
287 297
288 unit.startTimeUsec = timestampValue.Value.Value().(uint64) 298 unit.startTimeUsec = timestampValue.Value.Value().(uint64)