diff options
author | root <root@ip-172-31-44-20.us-west-2.compute.internal> | 2015-12-13 18:00:56 -0800 |
---|---|---|
committer | root <root@ip-172-31-44-20.us-west-2.compute.internal> | 2015-12-13 18:00:56 -0800 |
commit | be794d825cff203f3ca943eba7b2e37de9b60b56 (patch) | |
tree | 798d864bf82257db923574c5ebb68bf5056649e4 /backup | |
download | server_bin-be794d825cff203f3ca943eba7b2e37de9b60b56.tar.bz2 server_bin-be794d825cff203f3ca943eba7b2e37de9b60b56.tar.xz server_bin-be794d825cff203f3ca943eba7b2e37de9b60b56.zip |
Initial import
Diffstat (limited to 'backup')
-rwxr-xr-x | backup | 207 |
1 files changed, 207 insertions, 0 deletions
@@ -0,0 +1,207 @@ | |||
1 | #!/usr/bin/env python | ||
2 | |||
3 | import os | ||
4 | import sys | ||
5 | import math | ||
6 | import logging | ||
7 | import subprocess | ||
8 | from glob import glob | ||
9 | from tempfile import mkstemp | ||
10 | from datetime import datetime | ||
11 | from multiprocessing import Pool | ||
12 | from collections import namedtuple | ||
13 | from boto.s3.connection import S3Connection | ||
14 | from ConfigParser import SafeConfigParser | ||
15 | |||
16 | logging.basicConfig(level=logging.DEBUG, | ||
17 | format="%(asctime)s %(name)s[%(levelname)s]: %(message)s") | ||
18 | logger = logging.getLogger("backups") | ||
19 | |||
20 | # Boto is very noisy | ||
21 | logging.getLogger('boto').setLevel(logging.ERROR) | ||
22 | |||
23 | BucketId = namedtuple('BucketId', ('key', 'secret', 'name')) | ||
24 | FIVE_MB = 5242880 | ||
25 | DATE_FORMAT = "%Y%m%d%H%M%S" | ||
26 | UPLOAD_PROCESSES = 8 | ||
27 | |||
28 | |||
29 | def get_bucket(bucketid): | ||
30 | conn = S3Connection(bucketid.key, bucketid.secret) | ||
31 | return conn.get_bucket(bucketid.name) | ||
32 | |||
33 | |||
34 | def get_upload(bucket, multipart_id): | ||
35 | for mp in bucket.get_all_multipart_uploads(): | ||
36 | if mp.id == multipart_id: | ||
37 | return mp | ||
38 | |||
39 | |||
40 | def _upload_part(bucketid, multipart_id, part_num, source_path, bytes): | ||
41 | amount_of_retries = 5 | ||
42 | |||
43 | while amount_of_retries > 0: | ||
44 | try: | ||
45 | mp = get_upload(get_bucket(bucketid), multipart_id) | ||
46 | |||
47 | with open(source_path, "r") as fp: | ||
48 | fp.seek((part_num - 1) * FIVE_MB) | ||
49 | mp.upload_part_from_file(fp=fp, part_num=part_num, size=bytes) | ||
50 | |||
51 | return True | ||
52 | except Exception, exc: | ||
53 | amount_of_retries -= 1 | ||
54 | logger.warn("Upload part %s of %r failed, %s retries remaining", | ||
55 | part_num, source_path, amount_of_retries) | ||
56 | |||
57 | return False | ||
58 | |||
59 | |||
60 | def upload(bucketid, keyname, source_path): | ||
61 | bucket = get_bucket(bucketid) | ||
62 | mp = bucket.initiate_multipart_upload(keyname) | ||
63 | pool = Pool(processes=UPLOAD_PROCESSES) | ||
64 | |||
65 | results = [] | ||
66 | |||
67 | i = 0 | ||
68 | remaining = os.stat(source_path).st_size | ||
69 | total = int(math.ceil(remaining / float(FIVE_MB))) | ||
70 | logger.debug("Uploading in %d chunks", total) | ||
71 | while remaining > 0: | ||
72 | i += 1 | ||
73 | bytes = min([FIVE_MB, remaining]) | ||
74 | remaining -= bytes | ||
75 | results.append(pool.apply_async( | ||
76 | _upload_part, [bucketid, mp.id, i, source_path, bytes])) | ||
77 | |||
78 | pool.close() | ||
79 | pool.join() | ||
80 | |||
81 | if all([result.get(1) for result in results]): | ||
82 | logger.info("Upload succeeded") | ||
83 | mp.complete_upload() | ||
84 | bucket.get_key(keyname).set_acl('private') | ||
85 | return 0 | ||
86 | else: | ||
87 | logger.error("Upload failed, removing parts") | ||
88 | mp.cancel_upload() | ||
89 | return 1 | ||
90 | |||
91 | |||
92 | class ConfigParser(SafeConfigParser): | ||
93 | |||
94 | def __init__(self, filename): | ||
95 | SafeConfigParser.__init__(self) | ||
96 | |||
97 | with open(filename, "r") as fp: | ||
98 | self.readfp(fp) | ||
99 | |||
100 | self.primary_section = self.sections()[0] | ||
101 | |||
102 | def __getattr__(self, key): | ||
103 | if key == "mark_caches": | ||
104 | return self.getboolean(self.primary_section, key) | ||
105 | elif key == "keep": | ||
106 | return self.getint(self.primary_section, key) | ||
107 | else: | ||
108 | return self.get(self.primary_section, key) | ||
109 | |||
110 | @property | ||
111 | def path(self): | ||
112 | return self.primary_section | ||
113 | |||
114 | @property | ||
115 | def bucket_id(self): | ||
116 | return BucketId(self.access_key, self.secret_key, self.bucket) | ||
117 | |||
118 | |||
119 | def get_file_date(filename): | ||
120 | return datetime.strptime(filename.split("-")[-1], "%Y%m%d%H%M%S") | ||
121 | |||
122 | |||
123 | def backup_comparator(lhs, rhs): | ||
124 | return cmp(get_file_date(rhs.name), get_file_date(lhs.name)) | ||
125 | |||
126 | |||
127 | def trim_backups(bucket_id, prefix, max_items=3): | ||
128 | items = list(get_bucket(bucket_id).list(prefix)) | ||
129 | items.sort(backup_comparator) | ||
130 | |||
131 | for item in items[max_items:]: | ||
132 | logger.info("Pruning backup %s", item.name) | ||
133 | item.delete() | ||
134 | |||
135 | |||
136 | def mark_caches(path): | ||
137 | cmd = ("find '{path}' -type d -name 'cache' " | ||
138 | "-exec bash -c ""'echo " | ||
139 | "\"Signature: 8a477f597d28d172789f06886806bc55\" >" | ||
140 | " \"{{}}/CACHEDIR.TAG\"' \;") | ||
141 | subprocess.call(cmd.format(path=path), shell=True) | ||
142 | |||
143 | |||
144 | def tar_encrypt(now, key, path, tempdir="/srv/tmp"): | ||
145 | old_umask = os.umask(0200) | ||
146 | |||
147 | key_file = mkstemp(dir=tempdir) | ||
148 | archive_file = mkstemp(dir=tempdir) | ||
149 | |||
150 | logger.debug("Key file %s", key_file[1]) | ||
151 | logger.debug("Archive file %s", archive_file[1]) | ||
152 | |||
153 | tar_cmd = ("tar", "-c", "-C", path, "--exclude-caches", ".") | ||
154 | gpg_cmd = ("gpg", "-c", "-q", "--no-use-agent", "--batch", "--yes", | ||
155 | "--s2k-count", "1024", "--cipher-algo", "AES256", | ||
156 | "--digest-algo", "SHA512", "--passphrase-file", | ||
157 | key_file[1], "-o", archive_file[1]) | ||
158 | |||
159 | try: | ||
160 | os.write(key_file[0], "{}{}".format(key, now)) | ||
161 | |||
162 | tar = subprocess.Popen(tar_cmd, stdout=subprocess.PIPE) | ||
163 | gpg = subprocess.Popen(gpg_cmd, stdin=tar.stdout) | ||
164 | gpg.communicate() | ||
165 | finally: | ||
166 | os.unlink(key_file[1]) | ||
167 | os.umask(old_umask) | ||
168 | |||
169 | return archive_file[1] | ||
170 | |||
171 | |||
172 | def do_backup(cfg_file): | ||
173 | cfg = ConfigParser(cfg_file) | ||
174 | now = datetime.now().strftime(DATE_FORMAT) | ||
175 | |||
176 | logger.info("Starting backup for %s", cfg.primary_section) | ||
177 | |||
178 | if cfg.mark_caches: | ||
179 | logger.info("Marking caches") | ||
180 | mark_caches(cfg.path) | ||
181 | |||
182 | logger.info("Creating archive") | ||
183 | archive_file = tar_encrypt(now, cfg.encryption_key, cfg.path) | ||
184 | logger.info("Finished creating archive") | ||
185 | |||
186 | try: | ||
187 | logger.info("Uploading archive") | ||
188 | upload(cfg.bucket_id, | ||
189 | "{}-{}".format(cfg.filename, now), archive_file) | ||
190 | logger.info("Finished uploading archive") | ||
191 | finally: | ||
192 | os.unlink(archive_file) | ||
193 | |||
194 | logger.info("Trimming backups") | ||
195 | trim_backups(cfg.bucket_id, "{}-".format(cfg.filename), cfg.keep) | ||
196 | logger.info("Finished trimming backups") | ||
197 | |||
198 | logger.info("Backup for %s finished", cfg.primary_section) | ||
199 | |||
200 | |||
201 | def do_all_backups(pattern): | ||
202 | for config_file in glob(pattern): | ||
203 | do_backup(config_file) | ||
204 | |||
205 | |||
206 | if __name__ == "__main__": | ||
207 | do_all_backups("/srv/etc/backups/*.conf") | ||