aboutsummaryrefslogtreecommitdiff
path: root/text_collector_examples
diff options
context:
space:
mode:
authorSaj Goonatilleke <sg@redu.cx>2019-02-27 11:03:25 +1100
committerBen Kochie <superq@gmail.com>2019-02-27 01:03:25 +0100
commitd546916c6b686bb42fd115f4122ec59938a37137 (patch)
tree0177eb07ebc6b24aa12002bc70c89b8499f03685 /text_collector_examples
parent83c9b117471dba863619886f2d30ff4ef92ff7b4 (diff)
downloadprometheus_node_collector-d546916c6b686bb42fd115f4122ec59938a37137.tar.bz2
prometheus_node_collector-d546916c6b686bb42fd115f4122ec59938a37137.tar.xz
prometheus_node_collector-d546916c6b686bb42fd115f4122ec59938a37137.zip
Add the inotify-instances text collector (#1186)
This is an alternative take on the embedded inotify collector: https://github.com/prometheus/node_exporter/pull/988 The proposed embedded collector was not accepted for inclusion because it was not possible for a single unprivileged node_exporter process to detect inotify resource utilisation in other user domains. This text collector works around the problem by giving the operator a choice between the following: - Run only the text collector as root to gain visibility over all processes on the system. - Run one or more instances of the text collector as an unprivileged user to gain visibility over subsets of the system. In either case, the data generated by this collector can be useful when hunting down inotify instance leaks -- and when confirming the resolution of such leaks. Signed-off-by: Saj Goonatilleke <sg@redu.cx>
Diffstat (limited to 'text_collector_examples')
-rwxr-xr-xtext_collector_examples/inotify-instances141
1 files changed, 141 insertions, 0 deletions
diff --git a/text_collector_examples/inotify-instances b/text_collector_examples/inotify-instances
new file mode 100755
index 0000000..ada74d4
--- /dev/null
+++ b/text_collector_examples/inotify-instances
@@ -0,0 +1,141 @@
1#!/usr/bin/env python3
2
3"""
4Expose Linux inotify(7) instance resource consumption.
5
6Operational properties:
7
8 - This script may be invoked as an unprivileged user; in this case, metrics
9 will only be exposed for processes owned by that unprivileged user.
10
11 - No metrics will be exposed for processes that do not hold any inotify fds.
12
13Requires Python 3.5 or later.
14"""
15
16import collections
17import os
18import sys
19
20
21class Error(Exception):
22 pass
23
24
25class _PIDGoneError(Error):
26 pass
27
28
29_Process = collections.namedtuple(
30 "Process", ["pid", "uid", "command", "inotify_instances"])
31
32
33def _read_bytes(name):
34 with open(name, mode='rb') as f:
35 return f.read()
36
37
38def _pids():
39 for n in os.listdir("/proc"):
40 if not n.isdigit():
41 continue
42 yield int(n)
43
44
45def _pid_uid(pid):
46 try:
47 s = os.stat("/proc/{}".format(pid))
48 except FileNotFoundError:
49 raise _PIDGoneError()
50 return s.st_uid
51
52
53def _pid_command(pid):
54 # Avoid GNU ps(1) for it truncates comm.
55 # https://bugs.launchpad.net/ubuntu/+source/procps/+bug/295876/comments/3
56 try:
57 cmdline = _read_bytes("/proc/{}/cmdline".format(pid))
58 except FileNotFoundError:
59 raise _PIDGoneError()
60
61 if not len(cmdline):
62 return "<zombie>"
63
64 try:
65 prog = cmdline[0:cmdline.index(0x00)]
66 except ValueError:
67 prog = cmdline
68 return os.path.basename(prog).decode(encoding="ascii",
69 errors="surrogateescape")
70
71
72def _pid_inotify_instances(pid):
73 instances = 0
74 try:
75 for fd in os.listdir("/proc/{}/fd".format(pid)):
76 try:
77 target = os.readlink("/proc/{}/fd/{}".format(pid, fd))
78 except FileNotFoundError:
79 continue
80 if target == "anon_inode:inotify":
81 instances += 1
82 except FileNotFoundError:
83 raise _PIDGoneError()
84 return instances
85
86
87def _get_processes():
88 for p in _pids():
89 try:
90 yield _Process(p, _pid_uid(p), _pid_command(p),
91 _pid_inotify_instances(p))
92 except (PermissionError, _PIDGoneError):
93 continue
94
95
96def _get_processes_nontrivial():
97 return (p for p in _get_processes() if p.inotify_instances > 0)
98
99
100def _format_gauge_metric(metric_name, metric_help, samples,
101 value_func, tags_func=None, stream=sys.stdout):
102
103 def _println(*args, **kwargs):
104 if "file" not in kwargs:
105 kwargs["file"] = stream
106 print(*args, **kwargs)
107
108 def _print(*args, **kwargs):
109 if "end" not in kwargs:
110 kwargs["end"] = ""
111 _println(*args, **kwargs)
112
113 _println("# HELP {} {}".format(metric_name, metric_help))
114 _println("# TYPE {} gauge".format(metric_name))
115
116 for s in samples:
117 value = value_func(s)
118 tags = None
119 if tags_func:
120 tags = tags_func(s)
121
122 _print(metric_name)
123 if tags:
124 _print("{")
125 _print(",".join(["{}=\"{}\"".format(k, v) for k, v in tags]))
126 _print("}")
127 _print(" ")
128 _println(value)
129
130
131def main(args_unused=None):
132 _format_gauge_metric(
133 "inotify_instances",
134 "Total number of inotify instances held open by a process.",
135 _get_processes_nontrivial(),
136 lambda s: s.inotify_instances,
137 lambda s: [("pid", s.pid), ("uid", s.uid), ("command", s.command)])
138
139
140if __name__ == "__main__":
141 sys.exit(main(sys.argv))