diff options
Diffstat (limited to 'src/indexer.py')
-rw-r--r-- | src/indexer.py | 113 |
1 files changed, 113 insertions, 0 deletions
diff --git a/src/indexer.py b/src/indexer.py new file mode 100644 index 0000000..283cdbc --- /dev/null +++ b/src/indexer.py | |||
@@ -0,0 +1,113 @@ | |||
1 | import sqlite3 | ||
2 | |||
3 | from whoosh import fields, index | ||
4 | |||
5 | TRACK_QUERY = """ | ||
6 | SELECT | ||
7 | t.track_id, t.composer, t.explicit, t.disc_number, t.name as track_name, | ||
8 | t.track_number, t.year as track_year, g.name as genre, a.artist, | ||
9 | ab.title as album, ab.artist as album_artist, ab.compilation, | ||
10 | ab.disc_count, ab.gapless, ab.release_date, ab.track_count, | ||
11 | ab.year as album_year, p.bpm, p.bit_rate, p.sample_rate, p.total_time, | ||
12 | k.kind | ||
13 | FROM | ||
14 | track t | ||
15 | INNER JOIN | ||
16 | genre g | ||
17 | ON t.genre = g.id | ||
18 | INNER JOIN | ||
19 | artist a | ||
20 | ON t.artist = a.id | ||
21 | INNER JOIN | ||
22 | album ab | ||
23 | ON t.album = ab.id | ||
24 | INNER JOIN | ||
25 | track_physical p | ||
26 | ON t.track_id = p.track_id | ||
27 | INNER JOIN | ||
28 | kind k | ||
29 | ON p.kind = k.id | ||
30 | """ | ||
31 | |||
32 | def safe_unicode(value): | ||
33 | return value | ||
34 | return value.decode("utf-8") if value else None | ||
35 | |||
36 | def safe_int(value): | ||
37 | return int(value) if value is not None else None | ||
38 | |||
39 | def to_boolean(value): | ||
40 | return 1 if value else 0 | ||
41 | |||
42 | def first_value(*items): | ||
43 | for item in items: | ||
44 | if item is not None: | ||
45 | return item | ||
46 | |||
47 | return None | ||
48 | |||
49 | |||
50 | db = sqlite3.connect('iTunesLibrary.db') | ||
51 | db.row_factory = sqlite3.Row | ||
52 | |||
53 | curs = db.cursor() | ||
54 | curs.execute(TRACK_QUERY) | ||
55 | |||
56 | schema = fields.Schema( | ||
57 | track_id=fields.ID(stored=True), | ||
58 | composer=fields.TEXT(stored=True), | ||
59 | explicit=fields.BOOLEAN(stored=True), | ||
60 | disc_number=fields.NUMERIC(stored=True), | ||
61 | |||
62 | track_name=fields.NGRAM(stored=True), | ||
63 | genre=fields.NGRAM(stored=True), | ||
64 | artist=fields.NGRAM(stored=True), | ||
65 | album=fields.NGRAM(stored=True), | ||
66 | |||
67 | track_number=fields.NUMERIC(stored=True), | ||
68 | year=fields.NUMERIC(stored=True), | ||
69 | compilation=fields.BOOLEAN(stored=True), | ||
70 | disc_count=fields.NUMERIC(stored=True), | ||
71 | gapless=fields.BOOLEAN(stored=True), | ||
72 | release_date=fields.DATETIME(stored=True), | ||
73 | track_count=fields.NUMERIC(stored=True), | ||
74 | bpm=fields.NUMERIC(stored=True), | ||
75 | bit_rate=fields.NUMERIC(stored=True), | ||
76 | sample_rate=fields.NUMERIC(stored=True), | ||
77 | total_time=fields.NUMERIC(stored=True), | ||
78 | kind=fields.TEXT(stored=True) | ||
79 | ) | ||
80 | |||
81 | idx = index.create_in("indexdir", schema) | ||
82 | writer = idx.writer() | ||
83 | |||
84 | for record in curs.fetchall(): | ||
85 | writer.add_document( | ||
86 | track_id=str(record['track_id']).decode("ascii"), | ||
87 | |||
88 | composer=safe_unicode(record['composer']), | ||
89 | genre=safe_unicode(record['genre']), | ||
90 | album=safe_unicode(record['album']), | ||
91 | artist=safe_unicode(first_value(record['artist'], record['album_artist'])), | ||
92 | track_name=safe_unicode(record['track_name']), | ||
93 | kind=safe_unicode(record['kind']), | ||
94 | |||
95 | #release_date=record['release_date'], | ||
96 | |||
97 | explicit=to_boolean(record['explicit']), | ||
98 | compilation=to_boolean(record['compilation']), | ||
99 | gapless=to_boolean(record['gapless']), | ||
100 | |||
101 | disc_number=safe_int(record['disc_number']), | ||
102 | track_number=safe_int(record['track_number']), | ||
103 | year=safe_int(first_value(record['track_year'], record['album_year'])), | ||
104 | disc_count=safe_int(record['disc_count']), | ||
105 | track_count=safe_int(record['track_count']), | ||
106 | bpm=safe_int(record['bpm']), | ||
107 | bit_rate=safe_int(record['bit_rate']), | ||
108 | sample_rate=safe_int(record['sample_rate']), | ||
109 | total_time=safe_int(record['total_time']), | ||
110 | ) | ||
111 | |||
112 | writer.commit() | ||
113 | db.close() | ||