summaryrefslogtreecommitdiff
path: root/bin/get-ubuntu-mainline-kernel.py
blob: 2005dd39b880179bf282cd489e229f8bb8d23d95 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#!/usr/bin/env python3
"""
Ubuntu Mainline Kernel Downloader
Author: Mike Crute <mike[at]crute[dot]us>

This script parses the Ubuntu mainline kernel build page, determines the most
recent kernel version (skipping rc and suffixed kernels) and downloads the
appropriate (non-realtime) Debian packages for installation.

The architecture defaults to amd64 and can be customized at the bottom of the
script.

After running this, run `sudo dpkg -i *.deb` to install the latest kernel
packages.
"""

import re
import os.path
import logging
import html.parser
import urllib.parse
import urllib.request


class KernelVersion:
    """Kernel Version Parser and Wrapper

    Parses a kernel version from a URL and wraps it in a class that makes
    comparison and sorting easy. Considers major, minor, patch, and rc version
    components but nothing else. Will reject suffixed kernels (-wiley, etc...)
    because it's not easy to know where they fall in the series. This should be
    valid for all kernels 4.5 and newer but breaks for older ones.
    """

    VERSION_RE = re.compile(
        "^v(?P<major>[0-9]+)\.(?P<minor>[0-9]+)"
        "(\.(?P<patch>[0-9]+))?(-rc(?P<rc>[0-9]+))?/$")

    def __init__(self, raw_version, major, minor, patch, rc=None):
        self._raw_version = raw_version
        self.major = int(major or 0)
        self.minor = int(minor or 0)
        self.patch = int(patch or 0)
        self.rc = int(rc or 0)

    def __repr__(self):
        return (f"{self.__class__.__name__}({self._raw_version!r}, "
                f"{self.major!r}, {self.minor!r}, {self.patch!r}, "
                f"{self.rc!r})")

    def _as_tuple(self):
        return (self.major, self.minor, self.patch, self.rc)

    def __lt__(self, other):
        return self._as_tuple() < other._as_tuple()

    def __gt__(self, other):
        return self._as_tuple() > other._as_tuple()

    def __eq__(self, other):
        return self._as_tuple() == other._as_tuple()

    def __le__(self, other):
        return self < other or self == other

    def __ge__(self, other):
        return self > other or self == other

    def __hash__(self):
        return hash((self._raw_version,) + self._as_tuple())

    @classmethod
    def parse_version(cls, version):
        match = cls.VERSION_RE.match(version)
        if not match:
            raise ValueError(f"Unable to parse version {version!r}")

        return cls(
            version,
            int(match["major"]),
            int(match["minor"]),
            int(match["patch"] or 0),
            int(match["rc"] or 0)
        )

    @property
    def is_rc(self):
        return bool(self.rc)

    def detail_url(self, base_url):
        return urllib.parse.urljoin(base_url, self._raw_version)


class KernelPackage:
    """Kernel Package URL

    Encapsulates a kernel package URL and allow access to the filename and URL.
    Must be hashable because there are duplicate package names on the pages and
    we use a set to deduplicate them.
    """

    def __init__(self, base_url, filename):
        self.base_url = base_url
        self.filename = filename

    def __hash__(self):
        return hash((self.base_url, self.filename))

    def __eq__(self, other):
        return ((self.base_url, self.filename) ==
                (other.base_url, other.filename))

    @property
    def url(self):
        return urllib.parse.urljoin(self.base_url, self.filename)

    def fetch(self):
        urllib.request.urlretrieve(self.url, self.filename)


class HTMLParserFetcher(html.parser.HTMLParser):
    """HTML Fetcher and Parser Base Class

    Aggregates some set of results and returns them after fetching and parsing
    the page.
    """

    def __init__(self):
        self.results = set()
        super().__init__()

    def fetch(self, url):
        self.feed(urllib.request.urlopen(url).read().decode("utf-8"))
        return self.results


class KernelListParser(HTMLParserFetcher):
    """Main Kernel Version List Parser

    Parses the main list of kernel versions (an Apache index page) and contains
    a list of versions extracted from that page.
    """

    def __init__(self, skip_rcs):
        self.skip_rcs = skip_rcs
        super().__init__()

    def handle_starttag(self, tag, attrs):
        if tag != "a":
            return

        url = dict(attrs).get("href")
        if not url.startswith("v"):
            return

        try:
            version = KernelVersion.parse_version(url)
        except ValueError:
            logging.warn(f"Skipping invalid version {url!r}")
            return

        if self.skip_rcs and version.is_rc:
            return
        else:
            self.results.add(version)


class KernelDetailParser(HTMLParserFetcher):
    """Kernel Detail Page Parser

    Parses the detail page for a specific kernel version and returns candidate
    packages of the correct architecture that can be downloaded.

    There's a lot of duplication on this page so the results will be
    de-duplicated.
    """

    def __init__(self, base_url, arch):
        self.base_url = base_url
        self.pkg_re = re.compile(f".*_(all|{arch}).deb$")
        super().__init__()

    def _is_candidate(self, url):
        return url and self.pkg_re.match(url) and "lowlatency" not in url

    def handle_starttag(self, tag, attrs):
        if tag != "a":
            return

        url = dict(attrs).get("href")
        if self._is_candidate(url):
            self.results.add(KernelPackage(self.base_url, url))


class KernelVersionFetcher:
    """Fetch Kernel Packages

    Determines the latest version of the kernel avaiable (per the rules in
    KernelVersion) and downloads the packages required to install that version
    into the current directory.
    """

    URL = "https://kernel.ubuntu.com/~kernel-ppa/mainline/"

    def __init__(self, arch):
        self.arch = arch

    def find_latest_version(self, versions):
        for v in sorted(versions, reverse=True):
            return v

        # If we made it here we found no candidate version
        raise Exception("No latest version found")

    def find_packages(self):
        all_versions = KernelListParser(True).fetch(self.URL)
        latest = self.find_latest_version(all_versions)

        page_url = latest.detail_url(self.URL)
        return KernelDetailParser(page_url, self.arch).fetch(page_url)

    def fetch_packages(self):
        for package in self.find_packages():
            print(f"Fetching: {package.filename}")
            package.fetch()

        # TODO: Check the checksums


if __name__ == "__main__":
    KernelVersionFetcher("amd64").fetch_packages()