aboutsummaryrefslogtreecommitdiff
path: root/collector/mdadm_linux.go
diff options
context:
space:
mode:
authorTobias Schmidt <tobidt@gmail.com>2017-03-18 14:36:26 -0300
committerTobias Schmidt <tobidt@gmail.com>2017-03-19 08:03:58 -0300
commit0400e437be52d622db3050e6496fa53cb0830a28 (patch)
tree3964fe7db268875cd217a1467ad9ec1ce52667cf /collector/mdadm_linux.go
parentfaa7483215be62eaee4a3bf548aed6eaf74beabe (diff)
downloadprometheus_node_collector-0400e437be52d622db3050e6496fa53cb0830a28.tar.bz2
prometheus_node_collector-0400e437be52d622db3050e6496fa53cb0830a28.tar.xz
prometheus_node_collector-0400e437be52d622db3050e6496fa53cb0830a28.zip
Fix and simplify parsing of raid metrics
Fixes the wrong reporting of active+total disk metrics for inactive raids. Also simplifies the code and removes a couple of redundant comments.
Diffstat (limited to 'collector/mdadm_linux.go')
-rw-r--r--collector/mdadm_linux.go124
1 files changed, 46 insertions, 78 deletions
diff --git a/collector/mdadm_linux.go b/collector/mdadm_linux.go
index c2069ff..03f76fd 100644
--- a/collector/mdadm_linux.go
+++ b/collector/mdadm_linux.go
@@ -36,8 +36,8 @@ var (
36) 36)
37 37
38type mdStatus struct { 38type mdStatus struct {
39 mdName string 39 name string
40 isActive bool 40 active bool
41 disksActive int64 41 disksActive int64
42 disksTotal int64 42 disksTotal int64
43 blocksTotal int64 43 blocksTotal int64
@@ -136,97 +136,78 @@ func parseMdstat(mdStatusFilePath string) ([]mdStatus, error) {
136 return []mdStatus{}, fmt.Errorf("error parsing mdstat: %s", err) 136 return []mdStatus{}, fmt.Errorf("error parsing mdstat: %s", err)
137 } 137 }
138 138
139 mdStatusFile := string(content) 139 lines := strings.Split(string(content), "\n")
140
141 lines := strings.Split(mdStatusFile, "\n")
142 var (
143 currentMD string
144 personality string
145 active, total, size int64
146 )
147
148 // Each md has at least the deviceline, statusline and one empty line afterwards 140 // Each md has at least the deviceline, statusline and one empty line afterwards
149 // so we will have probably something of the order len(lines)/3 devices 141 // so we will have probably something of the order len(lines)/3 devices
150 // so we use that for preallocation. 142 // so we use that for preallocation.
151 estimateMDs := len(lines) / 3 143 mdStates := make([]mdStatus, 0, len(lines)/3)
152 mdStates := make([]mdStatus, 0, estimateMDs) 144 for i, line := range lines {
153 145 if line == "" {
154 for i, l := range lines {
155 if l == "" {
156 // Skip entirely empty lines.
157 continue 146 continue
158 } 147 }
159 148 if line[0] == ' ' || line[0] == '\t' {
160 if l[0] == ' ' || l[0] == '\t' { 149 // Lines starting with white space are not the beginning of a md-section.
161 // Those lines are not the beginning of a md-section.
162 continue 150 continue
163 } 151 }
164 152 if strings.HasPrefix(line, "Personalities") || strings.HasPrefix(line, "unused") {
165 if strings.HasPrefix(l, "Personalities") || strings.HasPrefix(l, "unused") { 153 // These lines contain general information.
166 // We aren't interested in lines with general info.
167 continue 154 continue
168 } 155 }
169 156
170 mainLine := strings.Split(l, " ") 157 mainLine := strings.Split(line, " ")
171 if len(mainLine) < 4 { 158 if len(mainLine) < 4 {
172 return mdStates, fmt.Errorf("error parsing mdline: %s", l) 159 return mdStates, fmt.Errorf("error parsing mdline: %s", line)
160 }
161 md := mdStatus{
162 name: mainLine[0],
163 active: mainLine[2] == "active",
173 } 164 }
174 currentMD = mainLine[0] // The name of the md-device. 165
175 isActive := (mainLine[2] == "active") // The activity status of the md-device. 166 if len(lines) <= i+3 {
176 personality = "" 167 return mdStates, fmt.Errorf("error parsing mdstat: entry for %s has fewer lines than expected", md.name)
168 }
169
170 personality := ""
177 for _, possiblePersonality := range mainLine[3:] { 171 for _, possiblePersonality := range mainLine[3:] {
178 if raidPersonalityRE.MatchString(possiblePersonality) { 172 if raidPersonalityRE.MatchString(possiblePersonality) {
179 personality = possiblePersonality 173 personality = possiblePersonality
180 break 174 break
181 } 175 }
182 } 176 }
183
184 if len(lines) <= i+3 {
185 return mdStates, fmt.Errorf("error parsing mdstat: entry for %s has fewer lines than expected", currentMD)
186 }
187
188 switch { 177 switch {
189 case personality == "raid0": 178 case personality == "raid0":
190 active = int64(len(mainLine) - 4) // Get the number of devices from the main line. 179 md.disksActive = int64(len(mainLine) - 4) // Get the number of devices from the main line.
191 total = active // Raid0 active and total is always the same if active. 180 md.disksTotal = md.disksActive // Raid0 active and total is always the same if active.
192 size, err = evalRaid0line(lines[i+1]) // Parse statusline, always present. 181 md.blocksTotal, err = evalRaid0line(lines[i+1])
193 case raidPersonalityRE.MatchString(personality): 182 case raidPersonalityRE.MatchString(personality):
194 active, total, size, err = evalStatusline(lines[i+1]) // Parse statusline, always present. 183 md.disksActive, md.disksTotal, md.blocksTotal, err = evalStatusline(lines[i+1])
195 default: 184 default:
196 log.Infof("Personality unknown: %s\n", mainLine) 185 log.Infof("Personality unknown: %s\n", mainLine)
197 size, err = evalUnknownPersonalitylineRE(lines[i+1]) // Parse statusline, always present. 186 md.blocksTotal, err = evalUnknownPersonalitylineRE(lines[i+1])
198 } 187 }
199
200 if err != nil { 188 if err != nil {
201 return mdStates, fmt.Errorf("error parsing mdstat: %s", err) 189 return mdStates, fmt.Errorf("error parsing mdstat: %s", err)
202 } 190 }
203 191
204 // Now get the number of synced blocks. 192 syncLine := lines[i+2]
205 var syncedBlocks int64 193 if strings.Contains(syncLine, "bitmap") {
206 194 syncLine = lines[i+3]
207 // Get the line number of the syncing-line.
208 var j int
209 if strings.Contains(lines[i+2], "bitmap") { // then skip the bitmap line
210 j = i + 3
211 } else {
212 j = i + 2
213 } 195 }
214 196
215 // If device is syncing at the moment, get the number of currently synced bytes, 197 // If device is syncing at the moment, get the number of currently synced bytes,
216 // otherwise that number equals the size of the device. 198 // otherwise that number equals the size of the device.
217 if strings.Contains(lines[j], "recovery") || 199 if strings.Contains(syncLine, "recovery") ||
218 strings.Contains(lines[j], "resync") && 200 strings.Contains(syncLine, "resync") &&
219 !strings.Contains(lines[j], "\tresync=") { 201 !strings.Contains(syncLine, "\tresync=") {
220 syncedBlocks, err = evalBuildline(lines[j]) 202 md.blocksSynced, err = evalBuildline(syncLine)
221 if err != nil { 203 if err != nil {
222 return mdStates, fmt.Errorf("error parsing mdstat: %s", err) 204 return mdStates, fmt.Errorf("error parsing mdstat: %s", err)
223 } 205 }
224 } else { 206 } else {
225 syncedBlocks = size 207 md.blocksSynced = md.blocksTotal
226 } 208 }
227 209
228 mdStates = append(mdStates, mdStatus{currentMD, isActive, active, total, size, syncedBlocks}) 210 mdStates = append(mdStates, md)
229
230 } 211 }
231 212
232 return mdStates, nil 213 return mdStates, nil
@@ -277,68 +258,55 @@ var (
277func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) error { 258func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) error {
278 statusfile := procFilePath("mdstat") 259 statusfile := procFilePath("mdstat")
279 if _, err := os.Stat(statusfile); err != nil { 260 if _, err := os.Stat(statusfile); err != nil {
280 // Take care we don't crash on non-existent statusfiles.
281 if os.IsNotExist(err) { 261 if os.IsNotExist(err) {
282 // no such file or directory, nothing to do, just return
283 log.Debugf("Not collecting mdstat, file does not exist: %s", statusfile) 262 log.Debugf("Not collecting mdstat, file does not exist: %s", statusfile)
284 return nil 263 return nil
285 } 264 }
286 return err 265 return err
287 } 266 }
288 267
289 // First parse mdstat-file...
290 mdstate, err := parseMdstat(statusfile) 268 mdstate, err := parseMdstat(statusfile)
291 if err != nil { 269 if err != nil {
292 return fmt.Errorf("error parsing mdstatus: %s", err) 270 return fmt.Errorf("error parsing mdstatus: %s", err)
293 } 271 }
294 272
295 // ... and then plug the result into the metrics to be exported.
296 var isActiveFloat float64
297 for _, mds := range mdstate { 273 for _, mds := range mdstate {
274 log.Debugf("collecting metrics for device %s", mds.name)
298 275
299 log.Debugf("collecting metrics for device %s", mds.mdName) 276 var active float64
300 277 if mds.active {
301 if mds.isActive { 278 active = 1
302 isActiveFloat = 1
303 } else {
304 isActiveFloat = 0
305 } 279 }
306
307 ch <- prometheus.MustNewConstMetric( 280 ch <- prometheus.MustNewConstMetric(
308 isActiveDesc, 281 isActiveDesc,
309 prometheus.GaugeValue, 282 prometheus.GaugeValue,
310 isActiveFloat, 283 active,
311 mds.mdName, 284 mds.name,
312 ) 285 )
313
314 ch <- prometheus.MustNewConstMetric( 286 ch <- prometheus.MustNewConstMetric(
315 disksActiveDesc, 287 disksActiveDesc,
316 prometheus.GaugeValue, 288 prometheus.GaugeValue,
317 float64(mds.disksActive), 289 float64(mds.disksActive),
318 mds.mdName, 290 mds.name,
319 ) 291 )
320
321 ch <- prometheus.MustNewConstMetric( 292 ch <- prometheus.MustNewConstMetric(
322 disksTotalDesc, 293 disksTotalDesc,
323 prometheus.GaugeValue, 294 prometheus.GaugeValue,
324 float64(mds.disksTotal), 295 float64(mds.disksTotal),
325 mds.mdName, 296 mds.name,
326 ) 297 )
327
328 ch <- prometheus.MustNewConstMetric( 298 ch <- prometheus.MustNewConstMetric(
329 blocksTotalDesc, 299 blocksTotalDesc,
330 prometheus.GaugeValue, 300 prometheus.GaugeValue,
331 float64(mds.blocksTotal), 301 float64(mds.blocksTotal),
332 mds.mdName, 302 mds.name,
333 ) 303 )
334
335 ch <- prometheus.MustNewConstMetric( 304 ch <- prometheus.MustNewConstMetric(
336 blocksSyncedDesc, 305 blocksSyncedDesc,
337 prometheus.GaugeValue, 306 prometheus.GaugeValue,
338 float64(mds.blocksSynced), 307 float64(mds.blocksSynced),
339 mds.mdName, 308 mds.name,
340 ) 309 )
341
342 } 310 }
343 311
344 return nil 312 return nil