diff options
author | Paul Gier <pgier@redhat.com> | 2019-02-11 16:27:21 -0600 |
---|---|---|
committer | Ben Kochie <superq@gmail.com> | 2019-02-11 23:27:21 +0100 |
commit | cb9e23c536b1daf2ac50ea52e515509951e0cd16 (patch) | |
tree | 06042e7603c118db3a0a64b1dd24747a067a9579 | |
parent | 1ba436e1949c7ab8af83642dc973cb30b118966a (diff) | |
download | prometheus_node_collector-cb9e23c536b1daf2ac50ea52e515509951e0cd16.tar.bz2 prometheus_node_collector-cb9e23c536b1daf2ac50ea52e515509951e0cd16.tar.xz prometheus_node_collector-cb9e23c536b1daf2ac50ea52e515509951e0cd16.zip |
Systemd refactor (#1254)
This reduces the system metric collection time by using a wait group
and go routines to allow the systemd metric calls happen concurrently.
Also, makes the start time, restarts, tasks_max, and tasks_current metrics disabled by default
because these can be time consuming to gather.
Signed-off-by: Paul Gier <pgier@redhat.com>
-rw-r--r-- | CHANGELOG.md | 3 | ||||
-rw-r--r-- | collector/systemd_linux.go | 352 | ||||
-rw-r--r-- | collector/systemd_linux_test.go | 21 |
3 files changed, 181 insertions, 195 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d6abb2..9bd7ee4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md | |||
@@ -7,6 +7,8 @@ | |||
7 | * The cpufreq metrics now separate the `cpufreq` and `scaling` data based on what the driver provides. #1248 | 7 | * The cpufreq metrics now separate the `cpufreq` and `scaling` data based on what the driver provides. #1248 |
8 | * The labels for the network_up metric have changed, see issue #1236 | 8 | * The labels for the network_up metric have changed, see issue #1236 |
9 | * Bonding collector now uses `mii_status` instead of `operstatus` #1124 | 9 | * Bonding collector now uses `mii_status` instead of `operstatus` #1124 |
10 | * Several systemd metrics have been turned off by default to improve performance #1254 | ||
11 | These include unit_tasks_current, unit_tasks_max, service_restart_total, and unit_start_time_seconds | ||
10 | 12 | ||
11 | ### Changes | 13 | ### Changes |
12 | 14 | ||
@@ -16,6 +18,7 @@ | |||
16 | * [CHANGE] Add TCPSynRetrans to netstat default filter #1143 | 18 | * [CHANGE] Add TCPSynRetrans to netstat default filter #1143 |
17 | * [CHANGE] Add a limit to the number of in-flight requests #1166 | 19 | * [CHANGE] Add a limit to the number of in-flight requests #1166 |
18 | * [CHANGE] Add separate cpufreq and scaling metrics #1248 | 20 | * [CHANGE] Add separate cpufreq and scaling metrics #1248 |
21 | * [CHANGE] Several systemd metrics have been turned off by default to improve performance #1254 | ||
19 | * [ENHANCEMENT] Add Infiniband counters #1120 | 22 | * [ENHANCEMENT] Add Infiniband counters #1120 |
20 | * [ENHANCEMENT] Move network_up labels into new metric network_info #1236 | 23 | * [ENHANCEMENT] Move network_up labels into new metric network_info #1236 |
21 | * [FEATURE] Add a flag to disable exporter metrics #1148 | 24 | * [FEATURE] Add a flag to disable exporter metrics #1148 |
diff --git a/collector/systemd_linux.go b/collector/systemd_linux.go index b34c938..faccded 100644 --- a/collector/systemd_linux.go +++ b/collector/systemd_linux.go | |||
@@ -20,6 +20,8 @@ import ( | |||
20 | "math" | 20 | "math" |
21 | "regexp" | 21 | "regexp" |
22 | "strings" | 22 | "strings" |
23 | "sync" | ||
24 | "time" | ||
23 | 25 | ||
24 | "github.com/coreos/go-systemd/dbus" | 26 | "github.com/coreos/go-systemd/dbus" |
25 | "github.com/prometheus/client_golang/prometheus" | 27 | "github.com/prometheus/client_golang/prometheus" |
@@ -28,9 +30,12 @@ import ( | |||
28 | ) | 30 | ) |
29 | 31 | ||
30 | var ( | 32 | var ( |
31 | unitWhitelist = kingpin.Flag("collector.systemd.unit-whitelist", "Regexp of systemd units to whitelist. Units must both match whitelist and not match blacklist to be included.").Default(".+").String() | 33 | unitWhitelist = kingpin.Flag("collector.systemd.unit-whitelist", "Regexp of systemd units to whitelist. Units must both match whitelist and not match blacklist to be included.").Default(".+").String() |
32 | unitBlacklist = kingpin.Flag("collector.systemd.unit-blacklist", "Regexp of systemd units to blacklist. Units must both match whitelist and not match blacklist to be included.").Default(".+\\.scope").String() | 34 | unitBlacklist = kingpin.Flag("collector.systemd.unit-blacklist", "Regexp of systemd units to blacklist. Units must both match whitelist and not match blacklist to be included.").Default(".+\\.scope").String() |
33 | systemdPrivate = kingpin.Flag("collector.systemd.private", "Establish a private, direct connection to systemd without dbus.").Bool() | 35 | systemdPrivate = kingpin.Flag("collector.systemd.private", "Establish a private, direct connection to systemd without dbus.").Bool() |
36 | enableTaskMetrics = kingpin.Flag("collector.systemd.enable-task-metrics", "Enables service unit tasks metrics unit_tasks_current and unit_tasks_max").Bool() | ||
37 | enableRestartsMetrics = kingpin.Flag("collector.systemd.enable-restarts-metrics", "Enables service unit metric service_restart_total").Bool() | ||
38 | enableStartTimeMetrics = kingpin.Flag("collector.systemd.enable-start-time-metrics", "Enables service unit metric unit_start_time_seconds").Bool() | ||
34 | ) | 39 | ) |
35 | 40 | ||
36 | type systemdCollector struct { | 41 | type systemdCollector struct { |
@@ -118,34 +123,102 @@ func NewSystemdCollector() (Collector, error) { | |||
118 | }, nil | 123 | }, nil |
119 | } | 124 | } |
120 | 125 | ||
126 | // Update gathers metrics from systemd. Dbus collection is done in parallel | ||
127 | // to reduce wait time for responses. | ||
121 | func (c *systemdCollector) Update(ch chan<- prometheus.Metric) error { | 128 | func (c *systemdCollector) Update(ch chan<- prometheus.Metric) error { |
122 | allUnits, err := c.getAllUnits() | 129 | begin := time.Now() |
130 | conn, err := c.newDbus() | ||
131 | if err != nil { | ||
132 | return fmt.Errorf("couldn't get dbus connection: %s", err) | ||
133 | } | ||
134 | defer conn.Close() | ||
135 | |||
136 | allUnits, err := c.getAllUnits(conn) | ||
123 | if err != nil { | 137 | if err != nil { |
124 | return fmt.Errorf("couldn't get units: %s", err) | 138 | return fmt.Errorf("couldn't get units: %s", err) |
125 | } | 139 | } |
140 | log.Debugf("systemd getAllUnits took %f", time.Since(begin).Seconds()) | ||
126 | 141 | ||
142 | begin = time.Now() | ||
127 | summary := summarizeUnits(allUnits) | 143 | summary := summarizeUnits(allUnits) |
128 | c.collectSummaryMetrics(ch, summary) | 144 | c.collectSummaryMetrics(ch, summary) |
145 | log.Debugf("systemd collectSummaryMetrics took %f", time.Since(begin).Seconds()) | ||
129 | 146 | ||
147 | begin = time.Now() | ||
130 | units := filterUnits(allUnits, c.unitWhitelistPattern, c.unitBlacklistPattern) | 148 | units := filterUnits(allUnits, c.unitWhitelistPattern, c.unitBlacklistPattern) |
131 | c.collectUnitStatusMetrics(ch, units) | 149 | log.Debugf("systemd filterUnits took %f", time.Since(begin).Seconds()) |
132 | c.collectUnitStartTimeMetrics(ch, units) | 150 | |
133 | c.collectUnitTasksCurrentMetrics(ch, units) | 151 | var wg sync.WaitGroup |
134 | c.collectUnitTasksMaxMetrics(ch, units) | 152 | defer wg.Wait() |
135 | c.collectTimers(ch, units) | 153 | |
136 | c.collectSockets(ch, units) | 154 | wg.Add(1) |
137 | 155 | go func() { | |
138 | systemState, err := c.getSystemState() | 156 | defer wg.Done() |
139 | if err != nil { | 157 | begin = time.Now() |
140 | return fmt.Errorf("couldn't get system state: %s", err) | 158 | c.collectUnitStatusMetrics(conn, ch, units) |
159 | log.Debugf("systemd collectUnitStatusMetrics took %f", time.Since(begin).Seconds()) | ||
160 | }() | ||
161 | |||
162 | if *enableStartTimeMetrics { | ||
163 | wg.Add(1) | ||
164 | go func() { | ||
165 | defer wg.Done() | ||
166 | begin = time.Now() | ||
167 | c.collectUnitStartTimeMetrics(conn, ch, units) | ||
168 | log.Debugf("systemd collectUnitStartTimeMetrics took %f", time.Since(begin).Seconds()) | ||
169 | }() | ||
141 | } | 170 | } |
142 | c.collectSystemState(ch, systemState) | ||
143 | 171 | ||
144 | return nil | 172 | if *enableTaskMetrics { |
173 | wg.Add(1) | ||
174 | go func() { | ||
175 | defer wg.Done() | ||
176 | begin = time.Now() | ||
177 | c.collectUnitTasksMetrics(conn, ch, units) | ||
178 | log.Debugf("systemd collectUnitTasksMetrics took %f", time.Since(begin).Seconds()) | ||
179 | }() | ||
180 | } | ||
181 | |||
182 | wg.Add(1) | ||
183 | go func() { | ||
184 | defer wg.Done() | ||
185 | begin = time.Now() | ||
186 | c.collectTimers(conn, ch, units) | ||
187 | log.Debugf("systemd collectTimers took %f", time.Since(begin).Seconds()) | ||
188 | }() | ||
189 | |||
190 | wg.Add(1) | ||
191 | go func() { | ||
192 | defer wg.Done() | ||
193 | begin = time.Now() | ||
194 | c.collectSockets(conn, ch, units) | ||
195 | log.Debugf("systemd collectSockets took %f", time.Since(begin).Seconds()) | ||
196 | }() | ||
197 | |||
198 | begin = time.Now() | ||
199 | err = c.collectSystemState(conn, ch) | ||
200 | log.Debugf("systemd collectSystemState took %f", time.Since(begin).Seconds()) | ||
201 | return err | ||
145 | } | 202 | } |
146 | 203 | ||
147 | func (c *systemdCollector) collectUnitStatusMetrics(ch chan<- prometheus.Metric, units []unit) { | 204 | func (c *systemdCollector) collectUnitStatusMetrics(conn *dbus.Conn, ch chan<- prometheus.Metric, units []unit) { |
148 | for _, unit := range units { | 205 | for _, unit := range units { |
206 | serviceType := "" | ||
207 | if strings.HasSuffix(unit.Name, ".service") { | ||
208 | serviceTypeProperty, err := conn.GetUnitTypeProperty(unit.Name, "Service", "Type") | ||
209 | if err != nil { | ||
210 | log.Debugf("couldn't get unit '%s' Type: %s", unit.Name, err) | ||
211 | } else { | ||
212 | serviceType = serviceTypeProperty.Value.Value().(string) | ||
213 | } | ||
214 | } else if strings.HasSuffix(unit.Name, ".mount") { | ||
215 | serviceTypeProperty, err := conn.GetUnitTypeProperty(unit.Name, "Mount", "Type") | ||
216 | if err != nil { | ||
217 | log.Debugf("couldn't get unit '%s' Type: %s", unit.Name, err) | ||
218 | } else { | ||
219 | serviceType = serviceTypeProperty.Value.Value().(string) | ||
220 | } | ||
221 | } | ||
149 | for _, stateName := range unitStatesName { | 222 | for _, stateName := range unitStatesName { |
150 | isActive := 0.0 | 223 | isActive := 0.0 |
151 | if stateName == unit.ActiveState { | 224 | if stateName == unit.ActiveState { |
@@ -153,73 +226,126 @@ func (c *systemdCollector) collectUnitStatusMetrics(ch chan<- prometheus.Metric, | |||
153 | } | 226 | } |
154 | ch <- prometheus.MustNewConstMetric( | 227 | ch <- prometheus.MustNewConstMetric( |
155 | c.unitDesc, prometheus.GaugeValue, isActive, | 228 | c.unitDesc, prometheus.GaugeValue, isActive, |
156 | unit.Name, stateName, unit.serviceType) | 229 | unit.Name, stateName, serviceType) |
157 | } | 230 | } |
158 | if strings.HasSuffix(unit.Name, ".service") && unit.nRestarts != nil { | 231 | if *enableRestartsMetrics && strings.HasSuffix(unit.Name, ".service") { |
159 | ch <- prometheus.MustNewConstMetric( | 232 | // NRestarts wasn't added until systemd 235. |
160 | c.nRestartsDesc, prometheus.CounterValue, | 233 | restartsCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "NRestarts") |
161 | float64(*unit.nRestarts), unit.Name) | 234 | if err != nil { |
235 | log.Debugf("couldn't get unit '%s' NRestarts: %s", unit.Name, err) | ||
236 | } else { | ||
237 | ch <- prometheus.MustNewConstMetric( | ||
238 | c.nRestartsDesc, prometheus.CounterValue, | ||
239 | float64(restartsCount.Value.Value().(uint32)), unit.Name) | ||
240 | } | ||
162 | } | 241 | } |
163 | } | 242 | } |
164 | } | 243 | } |
165 | 244 | ||
166 | func (c *systemdCollector) collectSockets(ch chan<- prometheus.Metric, units []unit) { | 245 | func (c *systemdCollector) collectSockets(conn *dbus.Conn, ch chan<- prometheus.Metric, units []unit) { |
167 | for _, unit := range units { | 246 | for _, unit := range units { |
168 | if !strings.HasSuffix(unit.Name, ".socket") { | 247 | if !strings.HasSuffix(unit.Name, ".socket") { |
169 | continue | 248 | continue |
170 | } | 249 | } |
171 | 250 | ||
251 | acceptedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NAccepted") | ||
252 | if err != nil { | ||
253 | log.Debugf("couldn't get unit '%s' NAccepted: %s", unit.Name, err) | ||
254 | continue | ||
255 | } | ||
172 | ch <- prometheus.MustNewConstMetric( | 256 | ch <- prometheus.MustNewConstMetric( |
173 | c.socketAcceptedConnectionsDesc, prometheus.CounterValue, | 257 | c.socketAcceptedConnectionsDesc, prometheus.CounterValue, |
174 | float64(unit.acceptedConnections), unit.Name) | 258 | float64(acceptedConnectionCount.Value.Value().(uint32)), unit.Name) |
259 | |||
260 | currentConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NConnections") | ||
261 | if err != nil { | ||
262 | log.Debugf("couldn't get unit '%s' NConnections: %s", unit.Name, err) | ||
263 | continue | ||
264 | } | ||
175 | ch <- prometheus.MustNewConstMetric( | 265 | ch <- prometheus.MustNewConstMetric( |
176 | c.socketCurrentConnectionsDesc, prometheus.GaugeValue, | 266 | c.socketCurrentConnectionsDesc, prometheus.GaugeValue, |
177 | float64(unit.currentConnections), unit.Name) | 267 | float64(currentConnectionCount.Value.Value().(uint32)), unit.Name) |
178 | if unit.refusedConnections != nil { | 268 | |
269 | // NRefused wasn't added until systemd 239. | ||
270 | refusedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NRefused") | ||
271 | if err != nil { | ||
272 | //log.Debugf("couldn't get unit '%s' NRefused: %s", unit.Name, err) | ||
273 | } else { | ||
179 | ch <- prometheus.MustNewConstMetric( | 274 | ch <- prometheus.MustNewConstMetric( |
180 | c.socketRefusedConnectionsDesc, prometheus.GaugeValue, | 275 | c.socketRefusedConnectionsDesc, prometheus.GaugeValue, |
181 | float64(*unit.refusedConnections), unit.Name) | 276 | float64(refusedConnectionCount.Value.Value().(uint32)), unit.Name) |
182 | } | 277 | } |
183 | } | 278 | } |
184 | } | 279 | } |
185 | 280 | ||
186 | func (c *systemdCollector) collectUnitStartTimeMetrics(ch chan<- prometheus.Metric, units []unit) { | 281 | func (c *systemdCollector) collectUnitStartTimeMetrics(conn *dbus.Conn, ch chan<- prometheus.Metric, units []unit) { |
282 | var startTimeUsec uint64 | ||
283 | |||
187 | for _, unit := range units { | 284 | for _, unit := range units { |
285 | if unit.ActiveState != "active" { | ||
286 | startTimeUsec = 0 | ||
287 | } else { | ||
288 | timestampValue, err := conn.GetUnitProperty(unit.Name, "ActiveEnterTimestamp") | ||
289 | if err != nil { | ||
290 | log.Debugf("couldn't get unit '%s' StartTimeUsec: %s", unit.Name, err) | ||
291 | continue | ||
292 | } | ||
293 | startTimeUsec = timestampValue.Value.Value().(uint64) | ||
294 | } | ||
295 | |||
188 | ch <- prometheus.MustNewConstMetric( | 296 | ch <- prometheus.MustNewConstMetric( |
189 | c.unitStartTimeDesc, prometheus.GaugeValue, | 297 | c.unitStartTimeDesc, prometheus.GaugeValue, |
190 | float64(unit.startTimeUsec)/1e6, unit.Name) | 298 | float64(startTimeUsec)/1e6, unit.Name) |
191 | } | 299 | } |
192 | } | 300 | } |
193 | 301 | ||
194 | func (c *systemdCollector) collectUnitTasksCurrentMetrics(ch chan<- prometheus.Metric, units []unit) { | 302 | func (c *systemdCollector) collectUnitTasksMetrics(conn *dbus.Conn, ch chan<- prometheus.Metric, units []unit) { |
303 | var val uint64 | ||
195 | for _, unit := range units { | 304 | for _, unit := range units { |
196 | if unit.tasksCurrent != nil { | 305 | if strings.HasSuffix(unit.Name, ".service") { |
197 | ch <- prometheus.MustNewConstMetric( | 306 | tasksCurrentCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "TasksCurrent") |
198 | c.unitTasksCurrentDesc, prometheus.GaugeValue, | 307 | if err != nil { |
199 | float64(*unit.tasksCurrent), unit.Name) | 308 | log.Debugf("couldn't get unit '%s' TasksCurrent: %s", unit.Name, err) |
309 | } else { | ||
310 | val = tasksCurrentCount.Value.Value().(uint64) | ||
311 | // Don't set if tasksCurrent if dbus reports MaxUint64. | ||
312 | if val != math.MaxUint64 { | ||
313 | ch <- prometheus.MustNewConstMetric( | ||
314 | c.unitTasksCurrentDesc, prometheus.GaugeValue, | ||
315 | float64(val), unit.Name) | ||
316 | } | ||
317 | } | ||
318 | tasksMaxCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "TasksMax") | ||
319 | if err != nil { | ||
320 | log.Debugf("couldn't get unit '%s' TasksMax: %s", unit.Name, err) | ||
321 | } else { | ||
322 | val = tasksMaxCount.Value.Value().(uint64) | ||
323 | // Don't set if tasksMax if dbus reports MaxUint64. | ||
324 | if val != math.MaxUint64 { | ||
325 | ch <- prometheus.MustNewConstMetric( | ||
326 | c.unitTasksMaxDesc, prometheus.GaugeValue, | ||
327 | float64(val), unit.Name) | ||
328 | } | ||
329 | } | ||
200 | } | 330 | } |
201 | } | 331 | } |
202 | } | 332 | } |
203 | 333 | ||
204 | func (c *systemdCollector) collectUnitTasksMaxMetrics(ch chan<- prometheus.Metric, units []unit) { | 334 | func (c *systemdCollector) collectTimers(conn *dbus.Conn, ch chan<- prometheus.Metric, units []unit) { |
205 | for _, unit := range units { | 335 | for _, unit := range units { |
206 | if unit.tasksMax != nil { | 336 | if !strings.HasSuffix(unit.Name, ".timer") { |
207 | ch <- prometheus.MustNewConstMetric( | 337 | continue |
208 | c.unitTasksMaxDesc, prometheus.GaugeValue, | ||
209 | float64(*unit.tasksMax), unit.Name) | ||
210 | } | 338 | } |
211 | } | ||
212 | } | ||
213 | 339 | ||
214 | func (c *systemdCollector) collectTimers(ch chan<- prometheus.Metric, units []unit) { | 340 | lastTriggerValue, err := conn.GetUnitTypeProperty(unit.Name, "Timer", "LastTriggerUSec") |
215 | for _, unit := range units { | 341 | if err != nil { |
216 | if !strings.HasSuffix(unit.Name, ".timer") { | 342 | log.Debugf("couldn't get unit '%s' LastTriggerUSec: %s", unit.Name, err) |
217 | continue | 343 | continue |
218 | } | 344 | } |
219 | 345 | ||
220 | ch <- prometheus.MustNewConstMetric( | 346 | ch <- prometheus.MustNewConstMetric( |
221 | c.timerLastTriggerDesc, prometheus.GaugeValue, | 347 | c.timerLastTriggerDesc, prometheus.GaugeValue, |
222 | float64(unit.lastTriggerUsec)/1e6, unit.Name) | 348 | float64(lastTriggerValue.Value.Value().(uint64))/1e6, unit.Name) |
223 | } | 349 | } |
224 | } | 350 | } |
225 | 351 | ||
@@ -230,12 +356,17 @@ func (c *systemdCollector) collectSummaryMetrics(ch chan<- prometheus.Metric, su | |||
230 | } | 356 | } |
231 | } | 357 | } |
232 | 358 | ||
233 | func (c *systemdCollector) collectSystemState(ch chan<- prometheus.Metric, systemState string) { | 359 | func (c *systemdCollector) collectSystemState(conn *dbus.Conn, ch chan<- prometheus.Metric) error { |
360 | systemState, err := conn.GetManagerProperty("SystemState") | ||
361 | if err != nil { | ||
362 | return fmt.Errorf("couldn't get system state: %s", err) | ||
363 | } | ||
234 | isSystemRunning := 0.0 | 364 | isSystemRunning := 0.0 |
235 | if systemState == `"running"` { | 365 | if systemState == `"running"` { |
236 | isSystemRunning = 1.0 | 366 | isSystemRunning = 1.0 |
237 | } | 367 | } |
238 | ch <- prometheus.MustNewConstMetric(c.systemRunningDesc, prometheus.GaugeValue, isSystemRunning) | 368 | ch <- prometheus.MustNewConstMetric(c.systemRunningDesc, prometheus.GaugeValue, isSystemRunning) |
369 | return nil | ||
239 | } | 370 | } |
240 | 371 | ||
241 | func (c *systemdCollector) newDbus() (*dbus.Conn, error) { | 372 | func (c *systemdCollector) newDbus() (*dbus.Conn, error) { |
@@ -247,37 +378,10 @@ func (c *systemdCollector) newDbus() (*dbus.Conn, error) { | |||
247 | 378 | ||
248 | type unit struct { | 379 | type unit struct { |
249 | dbus.UnitStatus | 380 | dbus.UnitStatus |
250 | lastTriggerUsec uint64 | ||
251 | startTimeUsec uint64 | ||
252 | tasksCurrent *uint64 | ||
253 | tasksMax *uint64 | ||
254 | nRestarts *uint32 | ||
255 | serviceType string | ||
256 | acceptedConnections uint32 | ||
257 | currentConnections uint32 | ||
258 | refusedConnections *uint32 | ||
259 | } | ||
260 | |||
261 | // unitType gets the suffix after the last "." in the | ||
262 | // unit name and capitalizes the first letter | ||
263 | func (u *unit) unitType() string { | ||
264 | suffixIndex := strings.LastIndex(u.Name, ".") + 1 | ||
265 | if suffixIndex < 1 || suffixIndex > len(u.Name) { | ||
266 | return "" | ||
267 | } | ||
268 | return strings.Title(u.Name[suffixIndex:]) | ||
269 | } | 381 | } |
270 | 382 | ||
271 | func (c *systemdCollector) getAllUnits() ([]unit, error) { | 383 | func (c *systemdCollector) getAllUnits(conn *dbus.Conn) ([]unit, error) { |
272 | conn, err := c.newDbus() | ||
273 | if err != nil { | ||
274 | return nil, fmt.Errorf("couldn't get dbus connection: %s", err) | ||
275 | } | ||
276 | defer conn.Close() | ||
277 | |||
278 | // Filter out any units that are not installed and are pulled in only as dependencies. | ||
279 | allUnits, err := conn.ListUnits() | 384 | allUnits, err := conn.ListUnits() |
280 | |||
281 | if err != nil { | 385 | if err != nil { |
282 | return nil, err | 386 | return nil, err |
283 | } | 387 | } |
@@ -287,96 +391,6 @@ func (c *systemdCollector) getAllUnits() ([]unit, error) { | |||
287 | unit := unit{ | 391 | unit := unit{ |
288 | UnitStatus: status, | 392 | UnitStatus: status, |
289 | } | 393 | } |
290 | unitType := unit.unitType() | ||
291 | if unitType == "Service" || unitType == "Mount" { | ||
292 | serviceType, err := conn.GetUnitTypeProperty(unit.Name, unitType, "Type") | ||
293 | if err != nil { | ||
294 | log.Debugf("couldn't get type for unit '%s': %s", unit.Name, err) | ||
295 | } else { | ||
296 | unit.serviceType = serviceType.Value.Value().(string) | ||
297 | } | ||
298 | } | ||
299 | if strings.HasSuffix(unit.Name, ".timer") { | ||
300 | lastTriggerValue, err := conn.GetUnitTypeProperty(unit.Name, "Timer", "LastTriggerUSec") | ||
301 | if err != nil { | ||
302 | log.Debugf("couldn't get unit '%s' LastTriggerUSec: %s", unit.Name, err) | ||
303 | continue | ||
304 | } | ||
305 | |||
306 | unit.lastTriggerUsec = lastTriggerValue.Value.Value().(uint64) | ||
307 | } | ||
308 | if strings.HasSuffix(unit.Name, ".service") { | ||
309 | // NRestarts wasn't added until systemd 235. | ||
310 | restartsCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "NRestarts") | ||
311 | if err != nil { | ||
312 | log.Debugf("couldn't get unit '%s' NRestarts: %s", unit.Name, err) | ||
313 | } else { | ||
314 | nRestarts := restartsCount.Value.Value().(uint32) | ||
315 | unit.nRestarts = &nRestarts | ||
316 | } | ||
317 | |||
318 | tasksCurrentCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "TasksCurrent") | ||
319 | if err != nil { | ||
320 | log.Debugf("couldn't get unit '%s' TasksCurrent: %s", unit.Name, err) | ||
321 | } else { | ||
322 | val := tasksCurrentCount.Value.Value().(uint64) | ||
323 | // Don't set if tasksCurrent if dbus reports MaxUint64. | ||
324 | if val != math.MaxUint64 { | ||
325 | unit.tasksCurrent = &val | ||
326 | } | ||
327 | } | ||
328 | |||
329 | tasksMaxCount, err := conn.GetUnitTypeProperty(unit.Name, "Service", "TasksMax") | ||
330 | if err != nil { | ||
331 | log.Debugf("couldn't get unit '%s' TasksMax: %s", unit.Name, err) | ||
332 | } else { | ||
333 | val := tasksMaxCount.Value.Value().(uint64) | ||
334 | // Don't set if tasksMax if dbus reports MaxUint64. | ||
335 | if val != math.MaxUint64 { | ||
336 | unit.tasksMax = &val | ||
337 | } | ||
338 | } | ||
339 | |||
340 | } | ||
341 | |||
342 | if strings.HasSuffix(unit.Name, ".socket") { | ||
343 | acceptedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NAccepted") | ||
344 | if err != nil { | ||
345 | log.Debugf("couldn't get unit '%s' NAccepted: %s", unit.Name, err) | ||
346 | continue | ||
347 | } | ||
348 | |||
349 | unit.acceptedConnections = acceptedConnectionCount.Value.Value().(uint32) | ||
350 | |||
351 | currentConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NConnections") | ||
352 | if err != nil { | ||
353 | log.Debugf("couldn't get unit '%s' NConnections: %s", unit.Name, err) | ||
354 | continue | ||
355 | } | ||
356 | unit.currentConnections = currentConnectionCount.Value.Value().(uint32) | ||
357 | |||
358 | // NRefused wasn't added until systemd 239. | ||
359 | refusedConnectionCount, err := conn.GetUnitTypeProperty(unit.Name, "Socket", "NRefused") | ||
360 | if err != nil { | ||
361 | log.Debugf("couldn't get unit '%s' NRefused: %s", unit.Name, err) | ||
362 | } else { | ||
363 | nRefused := refusedConnectionCount.Value.Value().(uint32) | ||
364 | unit.refusedConnections = &nRefused | ||
365 | } | ||
366 | } | ||
367 | |||
368 | if unit.ActiveState != "active" { | ||
369 | unit.startTimeUsec = 0 | ||
370 | } else { | ||
371 | timestampValue, err := conn.GetUnitProperty(unit.Name, "ActiveEnterTimestamp") | ||
372 | if err != nil { | ||
373 | log.Debugf("couldn't get unit '%s' StartTimeUsec: %s", unit.Name, err) | ||
374 | continue | ||
375 | } | ||
376 | |||
377 | unit.startTimeUsec = timestampValue.Value.Value().(uint64) | ||
378 | } | ||
379 | |||
380 | result = append(result, unit) | 394 | result = append(result, unit) |
381 | } | 395 | } |
382 | 396 | ||
@@ -410,13 +424,3 @@ func filterUnits(units []unit, whitelistPattern, blacklistPattern *regexp.Regexp | |||
410 | 424 | ||
411 | return filtered | 425 | return filtered |
412 | } | 426 | } |
413 | |||
414 | func (c *systemdCollector) getSystemState() (state string, err error) { | ||
415 | conn, err := c.newDbus() | ||
416 | if err != nil { | ||
417 | return "", fmt.Errorf("couldn't get dbus connection: %s", err) | ||
418 | } | ||
419 | state, err = conn.GetManagerProperty("SystemState") | ||
420 | conn.Close() | ||
421 | return state, err | ||
422 | } | ||
diff --git a/collector/systemd_linux_test.go b/collector/systemd_linux_test.go index 26257aa..b2298d6 100644 --- a/collector/systemd_linux_test.go +++ b/collector/systemd_linux_test.go | |||
@@ -18,7 +18,6 @@ import ( | |||
18 | "testing" | 18 | "testing" |
19 | 19 | ||
20 | "github.com/coreos/go-systemd/dbus" | 20 | "github.com/coreos/go-systemd/dbus" |
21 | "github.com/prometheus/client_golang/prometheus" | ||
22 | ) | 21 | ) |
23 | 22 | ||
24 | // Creates mock UnitLists | 23 | // Creates mock UnitLists |
@@ -87,26 +86,6 @@ func getUnitListFixtures() [][]unit { | |||
87 | return [][]unit{fixture1, fixture2} | 86 | return [][]unit{fixture1, fixture2} |
88 | } | 87 | } |
89 | 88 | ||
90 | func TestSystemdCollectorDoesntCrash(t *testing.T) { | ||
91 | c, err := NewSystemdCollector() | ||
92 | if err != nil { | ||
93 | t.Fatal(err) | ||
94 | } | ||
95 | sink := make(chan prometheus.Metric) | ||
96 | go func() { | ||
97 | for { | ||
98 | <-sink | ||
99 | } | ||
100 | }() | ||
101 | |||
102 | fixtures := getUnitListFixtures() | ||
103 | collector := (c).(*systemdCollector) | ||
104 | for _, units := range fixtures { | ||
105 | collector.collectUnitStatusMetrics(sink, units) | ||
106 | collector.collectSockets(sink, units) | ||
107 | } | ||
108 | } | ||
109 | |||
110 | func TestSystemdIgnoreFilter(t *testing.T) { | 89 | func TestSystemdIgnoreFilter(t *testing.T) { |
111 | fixtures := getUnitListFixtures() | 90 | fixtures := getUnitListFixtures() |
112 | whitelistPattern := regexp.MustCompile("^foo$") | 91 | whitelistPattern := regexp.MustCompile("^foo$") |