summaryrefslogtreecommitdiff
path: root/make_ecs_ami_yaml.py
blob: dda23dae6dce90f935a25d6bd328876488af7249 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python3
#
# Create a YAML mapping that is sutible for use in a CloudFormation template of
# the current ECS AMIs based on those published in the documentation. The
# script attempts to be as fault-tolerant as possible while still producing
# sane data.
#
# Assumptions:
# The script looks for a table where the first row (td) contains a set of
# headers (th), one of which has the text 'AMI ID' and the other of which is
# 'Region'. Given these it will extract the region/ami-id pairs.
#

import re
import yaml
import requests
from bs4 import BeautifulSoup

AMI_URL = ("https://docs.aws.amazon.com/AmazonECS/latest/"
           "developerguide/ecs-optimized_AMI.html")

AMI_RE = re.compile("^ami-[0-9a-f]+$")

soup = BeautifulSoup(requests.get(AMI_URL).text, "html.parser")

# Find the AMI table and column indicies
ami_table, id_index, region_index = None, None, None
for table in soup.find_all("table"):
    # Only want header rows for identification
    ths = table.tr.find_all('th')
    if len(ths) < 2:
        continue

    # Pick out the column mappings and hold onto their indicies
    for i, th in enumerate(ths):
        if th.text.lower() == "region":
            region_index = i

        if th.text.lower() == "ami id":
            ami_table = table
            id_index = i

    # Indicies may be zero
    if ami_table and region_index != None and id_index != None:
        break

# Indicies may be zero
if ami_table == None or id_index == None or region_index == None:
    raise Exception("No AMI table found")

# Build the AMI map
ami_map = {}
for row in ami_table.find_all("tr"):
    # Must not be a header (th) row
    tds = row.find_all("td")
    if len(tds) < max(id_index, region_index):
        continue

    region = tds[region_index].text
    ami_id = tds[id_index].text

    # Sanity check the AMI ID in-case we mis-identified a row
    if not AMI_RE.match(ami_id):
        continue

    ami_map[region] = {"AMI": ami_id}

# Validate that there at least enough AMIs (min 4 because there are currently 4
# big US regions)
if len(ami_map) < 4:
    raise Exception("Not enough AMIs found")

# Output AMI map as YAML
print(yaml.dump({"ECSRegionImages": ami_map},
    default_flow_style=False, explicit_start=True))