site_builder/blog.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181

# vim: set filencoding=utf8
"""
Blog Post Builder

@author: Mike Crute (mike@crute.us)
@date: June 03, 2010
"""

import os
from functools import wraps
from datetime import datetime

# Docutils imports, crazy yo
from docutils import nodes
from docutils.core import Publisher, publish_string
from docutils.transforms import Transform
from docutils.io import NullOutput, FileInput
from docutils.parsers.rst import Parser as RSTParser
from docutils.writers.html4css1 import Writer as HTMLWriter
from docutils.readers.standalone import Reader as StandaloneReader


class BlogMetaTransform(Transform):
    """
    Removes metadata tags from the document tree.

    This transformer removes the metadata nodes from the document tree
    and places them in a blog_meta dictionary on the document object.
    This happens before rendering so the meta won't show up in the output.
    """

    default_priority = 360 # Fuck if I know, same as the PEP header transform

    def __init__(self, *args, **kwargs):
        Transform.__init__(self, *args, **kwargs)

        self.meta = self.document.blog_meta = {
            'tags': [],
            }

    def apply(self):
        docinfo = None

        # One to get the docinfo and title
        # We need a copy of the document as a list so we can modify it
        # without messing up iteration.
        for node in list(self.document):
            if isinstance(node, nodes.docinfo):
                docinfo = node
                self.document.remove(node)

            if isinstance(node, nodes.title):
                self.meta['title'] = unicode(node[0])
                self.document.remove(node)

        # And one to process the docinfo
        for node in docinfo:
            if isinstance(node, nodes.author):
                self._handle_author(node)

            if isinstance(node, nodes.date):
                self._handle_date(node)

            if isinstance(node, nodes.field):
                self._handle_field(node)

    def _handle_author(self, node):
        self.meta['author'] = Author(node[0]['name'], node[0]['refuri'])

    def _handle_date(self, node):
        raw_date = unicode(node[0])
        self.meta['post_date'] = datetime.strptime(raw_date,
                                                    '%a %b %d %H:%M:%S %Y')

    def _handle_field(self, node):
        name = node[0][0]
        value = unicode(node[1][0][0])

        if name == 'Tag':
            self.meta['tags'].append(value)


class BlogPostReader(StandaloneReader):
    """
    Post reader for blog posts.

    This exists only so that we can append our custom blog
    transformers on to the regular ones.
    """

    def get_transforms(self):
        return StandaloneReader.get_transforms(self) + [
            BlogMetaTransform,
            ]


class Author(object):
    """
    Representation of the author information for a blog post.
    """

    def __init__(self, name, email):
        self.name = name
        self.email = email

        if email.startswith('mailto:'):
            self.email = email[len('mailto:'):]

    def __str__(self):
        return '{0} <{1}>'.format(self.name, self.email)


class BlogPost(object):
    """
    Representation of a blog post.

    Constructed from a docutils dom version of the blog post.
    """

    def __init__(self, title, post_date, author, tags, contents=None):
        self.title = title
        self.post_date = post_date
        self.author = author
        self.tags = tags
        self.contents = contents
        self._filename = None

    @property
    def filename(self):
        return os.path.basename(self._filename)

    @filename.setter
    def filename(self, value):
        self._filename = value

    @property
    def pretty_date(self):
        return self.post_date.strftime("%B %d, %Y")

    @classmethod
    def from_file(cls, filename):
        """
        Loads a file from disk, parses it and constructs a new BlogPost.

        This method reflects a bit of the insanity of docutils. Basically
        this is just the docutils.core.publish_doctree function with some
        modifications to use an html writer and to load a file instead of
        a string.
        """
        pub = Publisher(destination_class=NullOutput,
                        source=FileInput(source_path=filename),
                        reader=BlogPostReader(), writer=HTMLWriter(),
                        parser=RSTParser())

        pub.get_settings() # This is not sane.
        pub.settings.traceback = True # Damnit
        pub.publish()

        meta = pub.document.blog_meta
        post = cls(meta['title'], meta['post_date'], meta['author'],
                   meta['tags'], pub.writer.parts['html_body'])

        post.filename = filename

        return post


def load_post_index(directory='.'):
    """
    Scan the current directory for rst files and build an index.
    """
    posts = []
    for filename in os.listdir(directory):
        if not filename.endswith('.rst'):
            continue

        filename = os.path.join(directory, filename)
        posts.append(BlogPost.from_file(filename))

    return posts