src/index_files.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58

import os
import re
import json
import mutagen
from pprint import pprint


music = re.compile('.*\.(mp3|m4a)$')


tags = set()


def parse_frame(txt):
    frame_type, remainder = txt.split(':', 1)
    content = None

    if remainder.startswith('http'):
        remainder = remainder.split(':', 2)
        content = remainder[-1]
        remainder = ':'.join(remainder[:2])
    elif ':' in remainder:
        remainder, content = remainder.split(':', 1)

    return frame_type, remainder, content


for path, dirs, files in os.walk(os.path.expanduser('~/Desktop/Music')):
    for file in files:
        fullname = os.path.join(path, file)

        if not music.match(fullname):
            continue

        file = mutagen.File(fullname)

        if not file or not file.tags:
            print "ERROR: ", fullname
            continue

        for tag, value in file.tags.items():
            #if hasattr(value, 'text'):
            #    value = value.text

            #if isinstance(value, list) and len(value) == 1:
            #    value = value[0]

            #if tag == 'covr':
            #    continue

            #if hasattr(value, 'mime') and value.mime.startswith('image'):
            #    continue

            #if tag.startswith(('PRIV', 'APIC', 'COMM', 'USLT', 'WCOM')):
            #    continue

            #if not isinstance(value, unicode):
            print repr(tag), type(value), repr(value)