import os import re import json import mutagen from pprint import pprint music = re.compile('.*\.(mp3|m4a)$') tags = set() def parse_frame(txt): frame_type, remainder = txt.split(':', 1) content = None if remainder.startswith('http'): remainder = remainder.split(':', 2) content = remainder[-1] remainder = ':'.join(remainder[:2]) elif ':' in remainder: remainder, content = remainder.split(':', 1) return frame_type, remainder, content for path, dirs, files in os.walk(os.path.expanduser('~/Desktop/Music')): for file in files: fullname = os.path.join(path, file) if not music.match(fullname): continue file = mutagen.File(fullname) if not file or not file.tags: print "ERROR: ", fullname continue for tag, value in file.tags.items(): #if hasattr(value, 'text'): # value = value.text #if isinstance(value, list) and len(value) == 1: # value = value[0] #if tag == 'covr': # continue #if hasattr(value, 'mime') and value.mime.startswith('image'): # continue #if tag.startswith(('PRIV', 'APIC', 'COMM', 'USLT', 'WCOM')): # continue #if not isinstance(value, unicode): print repr(tag), type(value), repr(value)