summaryrefslogtreecommitdiff
path: root/backup
diff options
context:
space:
mode:
authorroot <root@ip-172-31-44-20.us-west-2.compute.internal>2015-12-13 18:00:56 -0800
committerroot <root@ip-172-31-44-20.us-west-2.compute.internal>2015-12-13 18:00:56 -0800
commitbe794d825cff203f3ca943eba7b2e37de9b60b56 (patch)
tree798d864bf82257db923574c5ebb68bf5056649e4 /backup
downloadserver_bin-be794d825cff203f3ca943eba7b2e37de9b60b56.tar.bz2
server_bin-be794d825cff203f3ca943eba7b2e37de9b60b56.tar.xz
server_bin-be794d825cff203f3ca943eba7b2e37de9b60b56.zip
Initial import
Diffstat (limited to 'backup')
-rwxr-xr-xbackup207
1 files changed, 207 insertions, 0 deletions
diff --git a/backup b/backup
new file mode 100755
index 0000000..8d0bd5d
--- /dev/null
+++ b/backup
@@ -0,0 +1,207 @@
1#!/usr/bin/env python
2
3import os
4import sys
5import math
6import logging
7import subprocess
8from glob import glob
9from tempfile import mkstemp
10from datetime import datetime
11from multiprocessing import Pool
12from collections import namedtuple
13from boto.s3.connection import S3Connection
14from ConfigParser import SafeConfigParser
15
16logging.basicConfig(level=logging.DEBUG,
17 format="%(asctime)s %(name)s[%(levelname)s]: %(message)s")
18logger = logging.getLogger("backups")
19
20# Boto is very noisy
21logging.getLogger('boto').setLevel(logging.ERROR)
22
23BucketId = namedtuple('BucketId', ('key', 'secret', 'name'))
24FIVE_MB = 5242880
25DATE_FORMAT = "%Y%m%d%H%M%S"
26UPLOAD_PROCESSES = 8
27
28
29def get_bucket(bucketid):
30 conn = S3Connection(bucketid.key, bucketid.secret)
31 return conn.get_bucket(bucketid.name)
32
33
34def get_upload(bucket, multipart_id):
35 for mp in bucket.get_all_multipart_uploads():
36 if mp.id == multipart_id:
37 return mp
38
39
40def _upload_part(bucketid, multipart_id, part_num, source_path, bytes):
41 amount_of_retries = 5
42
43 while amount_of_retries > 0:
44 try:
45 mp = get_upload(get_bucket(bucketid), multipart_id)
46
47 with open(source_path, "r") as fp:
48 fp.seek((part_num - 1) * FIVE_MB)
49 mp.upload_part_from_file(fp=fp, part_num=part_num, size=bytes)
50
51 return True
52 except Exception, exc:
53 amount_of_retries -= 1
54 logger.warn("Upload part %s of %r failed, %s retries remaining",
55 part_num, source_path, amount_of_retries)
56
57 return False
58
59
60def upload(bucketid, keyname, source_path):
61 bucket = get_bucket(bucketid)
62 mp = bucket.initiate_multipart_upload(keyname)
63 pool = Pool(processes=UPLOAD_PROCESSES)
64
65 results = []
66
67 i = 0
68 remaining = os.stat(source_path).st_size
69 total = int(math.ceil(remaining / float(FIVE_MB)))
70 logger.debug("Uploading in %d chunks", total)
71 while remaining > 0:
72 i += 1
73 bytes = min([FIVE_MB, remaining])
74 remaining -= bytes
75 results.append(pool.apply_async(
76 _upload_part, [bucketid, mp.id, i, source_path, bytes]))
77
78 pool.close()
79 pool.join()
80
81 if all([result.get(1) for result in results]):
82 logger.info("Upload succeeded")
83 mp.complete_upload()
84 bucket.get_key(keyname).set_acl('private')
85 return 0
86 else:
87 logger.error("Upload failed, removing parts")
88 mp.cancel_upload()
89 return 1
90
91
92class ConfigParser(SafeConfigParser):
93
94 def __init__(self, filename):
95 SafeConfigParser.__init__(self)
96
97 with open(filename, "r") as fp:
98 self.readfp(fp)
99
100 self.primary_section = self.sections()[0]
101
102 def __getattr__(self, key):
103 if key == "mark_caches":
104 return self.getboolean(self.primary_section, key)
105 elif key == "keep":
106 return self.getint(self.primary_section, key)
107 else:
108 return self.get(self.primary_section, key)
109
110 @property
111 def path(self):
112 return self.primary_section
113
114 @property
115 def bucket_id(self):
116 return BucketId(self.access_key, self.secret_key, self.bucket)
117
118
119def get_file_date(filename):
120 return datetime.strptime(filename.split("-")[-1], "%Y%m%d%H%M%S")
121
122
123def backup_comparator(lhs, rhs):
124 return cmp(get_file_date(rhs.name), get_file_date(lhs.name))
125
126
127def trim_backups(bucket_id, prefix, max_items=3):
128 items = list(get_bucket(bucket_id).list(prefix))
129 items.sort(backup_comparator)
130
131 for item in items[max_items:]:
132 logger.info("Pruning backup %s", item.name)
133 item.delete()
134
135
136def mark_caches(path):
137 cmd = ("find '{path}' -type d -name 'cache' "
138 "-exec bash -c ""'echo "
139 "\"Signature: 8a477f597d28d172789f06886806bc55\" >"
140 " \"{{}}/CACHEDIR.TAG\"' \;")
141 subprocess.call(cmd.format(path=path), shell=True)
142
143
144def tar_encrypt(now, key, path, tempdir="/srv/tmp"):
145 old_umask = os.umask(0200)
146
147 key_file = mkstemp(dir=tempdir)
148 archive_file = mkstemp(dir=tempdir)
149
150 logger.debug("Key file %s", key_file[1])
151 logger.debug("Archive file %s", archive_file[1])
152
153 tar_cmd = ("tar", "-c", "-C", path, "--exclude-caches", ".")
154 gpg_cmd = ("gpg", "-c", "-q", "--no-use-agent", "--batch", "--yes",
155 "--s2k-count", "1024", "--cipher-algo", "AES256",
156 "--digest-algo", "SHA512", "--passphrase-file",
157 key_file[1], "-o", archive_file[1])
158
159 try:
160 os.write(key_file[0], "{}{}".format(key, now))
161
162 tar = subprocess.Popen(tar_cmd, stdout=subprocess.PIPE)
163 gpg = subprocess.Popen(gpg_cmd, stdin=tar.stdout)
164 gpg.communicate()
165 finally:
166 os.unlink(key_file[1])
167 os.umask(old_umask)
168
169 return archive_file[1]
170
171
172def do_backup(cfg_file):
173 cfg = ConfigParser(cfg_file)
174 now = datetime.now().strftime(DATE_FORMAT)
175
176 logger.info("Starting backup for %s", cfg.primary_section)
177
178 if cfg.mark_caches:
179 logger.info("Marking caches")
180 mark_caches(cfg.path)
181
182 logger.info("Creating archive")
183 archive_file = tar_encrypt(now, cfg.encryption_key, cfg.path)
184 logger.info("Finished creating archive")
185
186 try:
187 logger.info("Uploading archive")
188 upload(cfg.bucket_id,
189 "{}-{}".format(cfg.filename, now), archive_file)
190 logger.info("Finished uploading archive")
191 finally:
192 os.unlink(archive_file)
193
194 logger.info("Trimming backups")
195 trim_backups(cfg.bucket_id, "{}-".format(cfg.filename), cfg.keep)
196 logger.info("Finished trimming backups")
197
198 logger.info("Backup for %s finished", cfg.primary_section)
199
200
201def do_all_backups(pattern):
202 for config_file in glob(pattern):
203 do_backup(config_file)
204
205
206if __name__ == "__main__":
207 do_all_backups("/srv/etc/backups/*.conf")