From: Colin McCabe Date: Sun, 13 Dec 2009 23:49:51 +0000 (-0800) Subject: Add tagger.py X-Git-Url: http://club.cc.cmu.edu/~cmccabe/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d1028f9bf3b801a24c130e75de6b035cd03851d0;p=cmccabe-bin Add tagger.py --- diff --git a/id3v2_wrapper.sh b/id3v2_wrapper.sh new file mode 100755 index 0000000..ac7321c --- /dev/null +++ b/id3v2_wrapper.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +# Run id3v2 without babbling to stdout about stripped tags +id3v2 "$@" | grep -v '^Stripping id3 tag in .*\.\.\.id3v[12] stripped.' + diff --git a/tagger.py b/tagger.py new file mode 100755 index 0000000..bda52af --- /dev/null +++ b/tagger.py @@ -0,0 +1,331 @@ +#!/usr/bin/python + +# +# Changes mp3 ID3 tags to match the file names. +# +# I like to store my mp3s in a file structure like this: +# +# Artist Name - Album Title = Conductor [Encoding]/01 - Track 1.mp3 +# Artist Name - Album Title = Conductor [Encoding]/02 - Track 2.mp3 +# ... +# +# This script runs through an entire directory of mp3s, and changes all the +# ID3 tags to match the file names. +# +# Philosophical aside: I guess you could argue that this defeats the point of +# ID3 tags, since under this system, allthe information is stored in the file +# name. This is true; however, I need to play my music on a lot of different +# systems (like mp3 players) which don't use my file naming scheme. +# +# I have had bad experiences with ID3 tags in the past. Every program seems +# to generate and parse them a little bit differently. The ID3 standard +# doesn't even specify whether to use unicode vs. Latin-1, let alone what you +# should do if a file has conflicting ID3v1 and ID3v2 tags. +# +# It's just easier to use a filing system that actually works well-- the Linux +# filesystem -- and regard IDv3 tags as something ephemeral that's generated +# out of the "real" file information. +# +# Colin McCabe +# 2008/12/7 +# + +import getopt +import os +import re +import stat +import string +import subprocess +import sys + +# GLOBALS + +# script arguments +dry_run = False +verbose = False +self_test = False + +# globals +total_albums = 0 +id3v2_wrapper = "" + +# Verifies that there is an executable script named 'target' in the same +# directory as this script. If not, prints an error message and exits. +def find_companion_script(target): + try: + mydir = os.path.dirname(sys.argv[0]) + target_path = mydir + "/" + target + statinfo = os.stat(mydir + "/" + target) + mode = statinfo[0] + if not (mode & stat.S_IEXEC): + print "ERROR: " + target + " is not executable" + sys.exit(1) + return target_path + except Exception, e: + print "ERROR: can't find id3v2_wrapper.sh: " + str(e) + sys.exit(1) + +# Regular expressions for parsing file names-- +# which is, after all, what this program is all about +music_file_re = re.compile(".*\.mp3$") + +music_file_name_re = re.compile(".*/" + + "(?P[^/]*)/" + + "(?P[0123456789][0123456789]) - " + + "(?P[^/]*)" + + "\.[a-zA-Z0123456789]*$") + +dir_name_re = re.compile("(.*/)?" + + "(?P[0-9A-Za-z _.\-]*?) - " + + "(?P[0-9A-Za-z _(),'.\-\+]*)" + + "(?P = [0-9A-Za-z _'.\-]*)?" + "(?P\[LL\])?$") + +def self_test_music_file(m, artist, album_name, \ + conductor, track_number, title): + if (m.album.artist != artist): + print "FAILED: artist: \"" + m.album.artist + "\"" + print "\tshould be: \"" + artist + "\"" + if (m.album.name != album_name): + print "FAILED: album_name: \"" + m.album.name + "\"" + print "\tshould be: \"" + album_name + "\"" + if (m.album.conductor != conductor): + print "FAILED: conductor: \"" + m.album.conductor + "\"" + print "\tshould be: \"" + conductor + "\"" + if (m.track_number != track_number): + print "FAILED: track_number: \"" + int(m.track_number) + "\"" + print "\tshould be: \"" + str(track_number) + "\"" + if (m.title != title): + print "FAILED: title: \"" + m.title + "\"" + print "\tshould be: \"" + title + "\"" + +def run_self_test(): + m = MusicFile.from_filename("./Mozart - " + + "Symphony No 26 in Eb Maj - K161a" + + " = The Academy of Ancient Music" + + "/01 - Adagio.mp3") + self_test_music_file(m, + artist="Mozart", + album_name="Symphony No 26 in Eb Maj - K161a", + conductor="The Academy of Ancient Music", + track_number=1, + title="Adagio") + + + m = MusicFile.from_filename("./Tchaikovsky - " + + "The Sleeping Beauty - Op. 66" + + " = Sir Charles Mackerras" + + "/02 - Scene.mp3") + self_test_music_file(m, + artist="Tchaikovsky", + album_name="The Sleeping Beauty - Op. 66", + conductor="Sir Charles Mackerras", + track_number=2, + title="Scene") + + # TODO: move John Cage into Comment or secondary author field here. + m = MusicFile.from_filename("./Various - " + + "American Classics" + + "/12 - John Cage - Prelude for Meditation.mp3") + self_test_music_file(m, + artist="Various", + album_name="American Classics", + conductor="", + track_number=12, + title="John Cage - Prelude for Meditation") + +# Given a hash H, creates a hash which is the inverse +# i.e. if H[k] = v, H'[v] = k +def reverse_hash(h): + ret = dict() + i = h.iteritems() + while 1: + try: + k,v = i.next() + ret[v] = k + except StopIteration: + break + return ret + +def my_system(ignore_ret, *cmd): + if (verbose == True): + print cmd + if (dry_run == False): + try: + my_env = {"MALLOC_CHECK_" : "0", "PATH" : os.environ.get("PATH")} + retcode = subprocess.call(cmd, env=my_env, shell=False) + if (retcode < 0): + print "ERROR: Child was terminated by signal", -retcode + else: + if ((not ignore_ret) and (retcode != 0)): + print "ERROR: Child returned", retcode + except OSError, e: + print "ERROR: Execution failed:", e + +# CLASSES +class FileType(object): + def __init__(self, encoding): + self.encoding = encoding + +class Album(object): + def __init__(self, artist, name, conductor, encoding): + if (artist == None): + raise MusicFileErr("can't have Album.artist = None") + if (name == None): + raise MusicFileErr("can't have Album.name = None") + self.artist = string.rstrip(artist) + self.name = string.rstrip(name) + if (conductor): + i = conductor.find(' = ') + self.conductor = conductor[i+len(' = '):] + else: + self.conductor = "" + self.encoding = string.rstrip(encoding) if encoding else "" + + def from_dirname(dirname): + match = dir_name_re.match(dirname) + if (not match): + raise MusicFileErr("can't parse directory name \"" + + dirname + "\"") + return Album(match.group('artist'), match.group('album'), + match.group('conductor'), match.group("encoding")) + from_dirname = staticmethod(from_dirname) + + def to_s(self): + ret = self.artist + " - " + self.name + if (self.conductor != None): + ret += " " + self.conductor + if (self.encoding != None): + ret += " " + self.encoding + return ret + +class MusicFileErr(Exception): + pass + +class MusicFile(object): + id3v2_to_attrib = { 'TIT2' : 'self.title', + 'TPE1' : 'self.album.artist', + 'TALB' : 'self.album.name', + 'TRCK' : 'str(self.track_number)', + 'TPE3' : 'self.album.conductor', + #'TYER' : 'year' + } + attrib_to_id3v2 = reverse_hash(id3v2_to_attrib) + + def __init__(self, filename, album, title, track_number): + self.filename = filename + self.album = album + self.title = title + self.track_number = int(track_number) + + def from_filename(filename): + match = music_file_name_re.match(filename) + if (not match): + raise MusicFileErr("can't parse music file name \"" + + filename + "\"") + album = Album.from_dirname(match.group('dir_name')) + return MusicFile(filename, album, + match.group('track_name'), + match.group('track_number')) + from_filename = staticmethod(from_filename) + + def to_s(self): + ret = self.album.to_s() + "/" + \ + ("%02d" % self.track_number) + " - " + self.title + return ret + + def clear_tags(self): + my_system(True, id3v2_wrapper, "--delete-v1", self.filename) + my_system(True, id3v2_wrapper, "--delete-v2", self.filename) + + def add_tag(self, att, expr): + attribute = "--" + att + my_system(False, "id3v2", attribute, expr, self.filename) + + def set_tags(self): + i = self.id3v2_to_attrib.iteritems() + while 1: + try: + att,expr = i.next() + self.add_tag(att, eval(expr)) + except StopIteration: + break +# CODE + +## Find id3v2_wrapper.sh +id3v2_wrapper = find_companion_script('id3v2_wrapper.sh') + +## Parse options +def Usage(): + print os.path.basename(sys.argv[0]) + ": the mp3 tagging program" + print + print "Usage: " + os.path.basename(sys.argv[0]) + \ + " [-h][-d][-s] [dirs]" + print "-h: this help message" + print "-d: dry-run mode" + print "-s: self-test" + print "dirs: directories to search for albums." + print "This program skips dirs with \"[LL]\" in the name." + sys.exit(1) + +try: + optlist, dirs = getopt.getopt(sys.argv[1:], ':dhi:sv') +except getopt.GetoptError: + Usage() + +for opt in optlist: + if opt[0] == '-h': + Usage() + if opt[0] == '-d': + dry_run = True + if opt[0] == '-v': + verbose = True + if opt[0] == '-s': + self_test = True + +if (self_test): + run_self_test() + sys.exit(0) + +for dir in dirs: + if (re.search("\[LL\]", dir)): + print "skipping \"" + dir + "\"..." + continue + # Assume that paths without a directory prefix are local + if ((dir[0] != "/") and (dir.find("./") != 0)): + dir = "./" + dir + + # Validate that 'dir' is a directory and we can access the entries + # Note: this does not protect against having nested directories with + # bad permissions + try: + entries = os.listdir(dir) + except: + print "ERROR: cannot stat entries of \"" + dir + "\"" + continue + + # Process all files in the directory + if (verbose): + print "******** find -L " + dir + " -noleaf" + proc = subprocess.Popen(['find', '-L', dir, '-noleaf'],\ + stdout=subprocess.PIPE) + line = proc.stdout.readline() + while line != '': + file_name = line.strip() + if (music_file_re.match(file_name)): + try: + m = MusicFile.from_filename(file_name) + m.clear_tags() + m.set_tags() + if (verbose): + print "SUCCESS: " + file_name + total_albums = total_albums + 1 + except MusicFileErr, e: + print "ERROR: " + str(e) + line = proc.stdout.readline() + if (verbose): + print "********" + +if (dry_run): + print "(dry run)", +print "Successfully processed " + str(total_albums) + " total mp3s"