summaryrefslogtreecommitdiff
path: root/src/indexer.py
blob: 283cdbc18ad3821321e666f615a22ca63f2013aa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import sqlite3

from whoosh import fields, index

TRACK_QUERY = """
SELECT
    t.track_id, t.composer, t.explicit, t.disc_number, t.name as track_name,
    t.track_number, t.year as track_year, g.name as genre, a.artist,
    ab.title as album, ab.artist as album_artist, ab.compilation,
    ab.disc_count, ab.gapless, ab.release_date, ab.track_count,
    ab.year as album_year, p.bpm, p.bit_rate, p.sample_rate, p.total_time,
    k.kind
FROM
    track t
INNER JOIN
    genre g
ON  t.genre = g.id
INNER JOIN
    artist a
ON  t.artist = a.id
INNER JOIN
    album ab
ON  t.album = ab.id
INNER JOIN
    track_physical p
ON  t.track_id = p.track_id
INNER JOIN
    kind k
ON  p.kind = k.id
"""

def safe_unicode(value):
    return value
    return value.decode("utf-8") if value else None

def safe_int(value):
    return int(value) if value is not None else None

def to_boolean(value):
    return 1 if value else 0

def first_value(*items):
    for item in items:
        if item is not None:
            return item

    return None


db = sqlite3.connect('iTunesLibrary.db')
db.row_factory = sqlite3.Row

curs = db.cursor()
curs.execute(TRACK_QUERY)

schema = fields.Schema(
    track_id=fields.ID(stored=True),
    composer=fields.TEXT(stored=True),
    explicit=fields.BOOLEAN(stored=True),
    disc_number=fields.NUMERIC(stored=True),

    track_name=fields.NGRAM(stored=True),
    genre=fields.NGRAM(stored=True),
    artist=fields.NGRAM(stored=True),
    album=fields.NGRAM(stored=True),

    track_number=fields.NUMERIC(stored=True),
    year=fields.NUMERIC(stored=True),
    compilation=fields.BOOLEAN(stored=True),
    disc_count=fields.NUMERIC(stored=True),
    gapless=fields.BOOLEAN(stored=True),
    release_date=fields.DATETIME(stored=True),
    track_count=fields.NUMERIC(stored=True),
    bpm=fields.NUMERIC(stored=True),
    bit_rate=fields.NUMERIC(stored=True),
    sample_rate=fields.NUMERIC(stored=True),
    total_time=fields.NUMERIC(stored=True),
    kind=fields.TEXT(stored=True)
)

idx = index.create_in("indexdir", schema)
writer = idx.writer()

for record in curs.fetchall():
    writer.add_document(
        track_id=str(record['track_id']).decode("ascii"),

        composer=safe_unicode(record['composer']),
        genre=safe_unicode(record['genre']),
        album=safe_unicode(record['album']),
        artist=safe_unicode(first_value(record['artist'], record['album_artist'])),
        track_name=safe_unicode(record['track_name']),
        kind=safe_unicode(record['kind']),

        #release_date=record['release_date'],

        explicit=to_boolean(record['explicit']),
        compilation=to_boolean(record['compilation']),
        gapless=to_boolean(record['gapless']),

        disc_number=safe_int(record['disc_number']),
        track_number=safe_int(record['track_number']),
        year=safe_int(first_value(record['track_year'], record['album_year'])),
        disc_count=safe_int(record['disc_count']),
        track_count=safe_int(record['track_count']),
        bpm=safe_int(record['bpm']),
        bit_rate=safe_int(record['bit_rate']),
        sample_rate=safe_int(record['sample_rate']),
        total_time=safe_int(record['total_time']),
        )

writer.commit()
db.close()