#!/usr/bin/env python3 """ Ubuntu Mainline Kernel Downloader Author: Mike Crute This script parses the Ubuntu mainline kernel build page, determines the most recent kernel version (skipping rc and suffixed kernels) and downloads the appropriate (non-realtime) Debian packages for installation. The architecture defaults to amd64 and can be customized at the bottom of the script. After running this, run `sudo dpkg -i *.deb` to install the latest kernel packages. """ import re import os.path import logging import html.parser import urllib.parse import urllib.request class KernelVersion: """Kernel Version Parser and Wrapper Parses a kernel version from a URL and wraps it in a class that makes comparison and sorting easy. Considers major, minor, patch, and rc version components but nothing else. Will reject suffixed kernels (-wiley, etc...) because it's not easy to know where they fall in the series. This should be valid for all kernels 4.5 and newer but breaks for older ones. """ VERSION_RE = re.compile( "^v(?P[0-9]+)\.(?P[0-9]+)" "(\.(?P[0-9]+))?(-rc(?P[0-9]+))?/$") def __init__(self, raw_version, major, minor, patch, rc=None): self._raw_version = raw_version self.major = int(major or 0) self.minor = int(minor or 0) self.patch = int(patch or 0) self.rc = int(rc or 0) def __repr__(self): return (f"{self.__class__.__name__}({self._raw_version!r}, " f"{self.major!r}, {self.minor!r}, {self.patch!r}, " f"{self.rc!r})") def _as_tuple(self): return (self.major, self.minor, self.patch, self.rc) def __lt__(self, other): return self._as_tuple() < other._as_tuple() def __gt__(self, other): return self._as_tuple() > other._as_tuple() def __eq__(self, other): return self._as_tuple() == other._as_tuple() def __le__(self, other): return self < other or self == other def __ge__(self, other): return self > other or self == other def __hash__(self): return hash((self._raw_version,) + self._as_tuple()) @classmethod def parse_version(cls, version): match = cls.VERSION_RE.match(version) if not match: raise ValueError(f"Unable to parse version {version!r}") return cls( version, int(match["major"]), int(match["minor"]), int(match["patch"] or 0), int(match["rc"] or 0) ) @property def is_rc(self): return bool(self.rc) def detail_url(self, base_url): return urllib.parse.urljoin(base_url, self._raw_version) class KernelPackage: """Kernel Package URL Encapsulates a kernel package URL and allow access to the filename and URL. Must be hashable because there are duplicate package names on the pages and we use a set to deduplicate them. """ def __init__(self, base_url, filename): self.base_url = base_url self.filename = filename def __hash__(self): return hash((self.base_url, self.filename)) def __eq__(self, other): return ((self.base_url, self.filename) == (other.base_url, other.filename)) @property def url(self): return urllib.parse.urljoin(self.base_url, self.filename) def fetch(self): urllib.request.urlretrieve(self.url, self.filename) class HTMLParserFetcher(html.parser.HTMLParser): """HTML Fetcher and Parser Base Class Aggregates some set of results and returns them after fetching and parsing the page. """ def __init__(self): self.results = set() super().__init__() def fetch(self, url): self.feed(urllib.request.urlopen(url).read().decode("utf-8")) return self.results class KernelListParser(HTMLParserFetcher): """Main Kernel Version List Parser Parses the main list of kernel versions (an Apache index page) and contains a list of versions extracted from that page. """ def __init__(self, skip_rcs): self.skip_rcs = skip_rcs super().__init__() def handle_starttag(self, tag, attrs): if tag != "a": return url = dict(attrs).get("href") if not url.startswith("v"): return try: version = KernelVersion.parse_version(url) except ValueError: logging.warn(f"Skipping invalid version {url!r}") return if self.skip_rcs and version.is_rc: return else: self.results.add(version) class KernelDetailParser(HTMLParserFetcher): """Kernel Detail Page Parser Parses the detail page for a specific kernel version and returns candidate packages of the correct architecture that can be downloaded. There's a lot of duplication on this page so the results will be de-duplicated. """ def __init__(self, base_url, arch): self.base_url = base_url self.pkg_re = re.compile(f".*_(all|{arch}).deb$") super().__init__() def _is_candidate(self, url): return url and self.pkg_re.match(url) and "lowlatency" not in url def handle_starttag(self, tag, attrs): if tag != "a": return url = dict(attrs).get("href") if self._is_candidate(url): self.results.add(KernelPackage(self.base_url, url)) class KernelVersionFetcher: """Fetch Kernel Packages Determines the latest version of the kernel avaiable (per the rules in KernelVersion) and downloads the packages required to install that version into the current directory. """ URL = "https://kernel.ubuntu.com/~kernel-ppa/mainline/" def __init__(self, arch): self.arch = arch def find_latest_version(self, versions): for v in sorted(versions, reverse=True): return v # If we made it here we found no candidate version raise Exception("No latest version found") def find_packages(self): all_versions = KernelListParser(True).fetch(self.URL) latest = self.find_latest_version(all_versions) page_url = latest.detail_url(self.URL) return KernelDetailParser(page_url, self.arch).fetch(page_url) def fetch_packages(self): for package in self.find_packages(): print(f"Fetching: {package.filename}") package.fetch() # TODO: Check the checksums if __name__ == "__main__": KernelVersionFetcher("amd64").fetch_packages()