====== mkr2cat.py ====== # coding: utf-8 ''' Converts a bibliographic database in MarcMaker format to "id format". The encoding of the original database is utf-8; the output is encoded as latin1 (previously, decomposed characters are composed). ''' import sys import unicodedata #import os dbname = sys.argv[1] encoding_in = 'utf-8' encoding_out = 'latin1' in_file = open('%s.mkr' % dbname) out_file = open('%s.id' % dbname, 'w') out_file.write('!ID 0\n') for line in in_file: if line[:1] == '=': tag = line[1:4] if tag > '900': continue elif tag < '010': data = line[6:] out = '!v%s!%s' % (tag, data) else: line = line.decode(encoding_in) indicators = line[6:8].replace('\\', '#') subfields = line[8:].replace('$', '^').replace('{dollar}', '$') subfields = unicodedata.normalize('NFC', subfields) out = '!v%s!%s%s' % (tag, indicators, subfields) out = out.encode(encoding_out, 'ignore') else: out = '\n!ID 0\n' out_file.write(out) out_file.close()