aboutsummaryrefslogtreecommitdiff
path: root/text_collector_examples
diff options
context:
space:
mode:
authorMatt Bostock <matt@mattbostock.com>2018-05-25 17:20:42 +0100
committerJohannes 'fish' Ziemke <github@freigeist.org>2018-05-25 18:20:42 +0200
commit516e5d4beb11565246890346b25f4d38831d6987 (patch)
tree61dc771933bf4b7708b8c6bf69ebdce3792e25cd /text_collector_examples
parent606568314befb8127ce48de29ecb647bd9d8507a (diff)
downloadprometheus_node_collector-516e5d4beb11565246890346b25f4d38831d6987.tar.bz2
prometheus_node_collector-516e5d4beb11565246890346b25f4d38831d6987.tar.xz
prometheus_node_collector-516e5d4beb11565246890346b25f4d38831d6987.zip
Add metric for outdated libraries (#957)
Add metrics that count how many running processes are linking to deleted libraries on each machine. Deleted libraries are usually outdated libraries, and outdated libraries may have known security vulnerabilities. The rationale behind storing these as metrics is allow the rollout of security fixes to be tracked across a fleet of machines, ensuring that all affected processes are restarted (e.g. via a reboot). I'm parsing the output from `/proc/*/maps` because it's using `lsof -d DEL` can be too slow, particularly if you have sockets that bind to thousands of IP addresses. The metric labels include the library path and the base filename, which allows us to pinpoint the exact path of the deleted library but also allows us to aggregate on the library name (or approximations of it) even if library locations differ between operating system versions. The metrics output and the CPU time consumed is as follows: user@host:~$ time sudo python processes.py # HELP node_processes_linking_deleted_libraries Count of running processes that link a deleted library # TYPE node_processes_linking_deleted_libraries gauge node_processes_linking_deleted_libraries{library_path="locale-archive", library_name="/usr/lib/locale"} 3 node_processes_linking_deleted_libraries{library_path="libevent-2.0.so.5.1.9", library_name="/usr/lib/x86_64-linux-gnu"} 4 real 0m0.071s user 0m0.030s sys 0m0.041s Including the library filename and path will result in reasonably high metrics cardinality, however I think the benefits when an urgent security patch is being deployed outweigh concerns around cardinality. This script assumes that library files do not contain spaces in their path. Signed-off-by: Matt Bostock <mbostock@cloudflare.com>
Diffstat (limited to 'text_collector_examples')
-rwxr-xr-xtext_collector_examples/deleted_libraries.py70
1 files changed, 70 insertions, 0 deletions
diff --git a/text_collector_examples/deleted_libraries.py b/text_collector_examples/deleted_libraries.py
new file mode 100755
index 0000000..b6d5809
--- /dev/null
+++ b/text_collector_examples/deleted_libraries.py
@@ -0,0 +1,70 @@
1#!/usr/bin/env python
2"""
3Script to count the number of deleted libraries that are linked by running
4processes and expose a summary as Prometheus metrics.
5
6The aim is to discover processes that are still using libraries that have since
7been updated, perhaps due security vulnerabilities.
8"""
9
10import errno
11import glob
12import os
13import sys
14
15
16def main():
17 processes_linking_deleted_libraries = {}
18
19 for path in glob.glob('/proc/*/maps'):
20 try:
21 with open(path, 'rb') as file:
22 for line in file:
23 part = line.strip().split()
24
25 if len(part) == 7:
26 library = part[5]
27 comment = part[6]
28
29 if '/lib/' in library and '(deleted)' in comment:
30 if path not in processes_linking_deleted_libraries:
31 processes_linking_deleted_libraries[path] = {}
32
33 if library in processes_linking_deleted_libraries[path]:
34 processes_linking_deleted_libraries[path][library] += 1
35 else:
36 processes_linking_deleted_libraries[path][library] = 1
37 except EnvironmentError as e:
38 # Ignore non-existent files, since the files may have changed since
39 # we globbed.
40 if e.errno != errno.ENOENT:
41 sys.exit('Failed to open file: {0}'.format(path))
42
43 num_processes_per_library = {}
44
45 for process, library_count in processes_linking_deleted_libraries.iteritems():
46 libraries_seen = set()
47 for library, count in library_count.iteritems():
48 if library in libraries_seen:
49 continue
50
51 libraries_seen.add(library)
52 if library in num_processes_per_library:
53 num_processes_per_library[library] += 1
54 else:
55 num_processes_per_library[library] = 1
56
57 metric_name = 'node_processes_linking_deleted_libraries'
58 description = 'Count of running processes that link a deleted library'
59 print('# HELP {0} {1}'.format(metric_name, description))
60 print('# TYPE {0} gauge'.format(metric_name))
61
62 for library, count in num_processes_per_library.iteritems():
63 dir_path, basename = os.path.split(library)
64 basename = basename.replace('"', '\\"')
65 dir_path = dir_path.replace('"', '\\"')
66 print('{0}{{library_path="{1}", library_name="{2}"}} {3}'.format(metric_name, dir_path, basename, count))
67
68
69if __name__ == "__main__":
70 main()