User Tools

Site Tools


mkr2cat.py

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

mkr2cat.py [12/04/2012 00:00]
mkr2cat.py [12/04/2012 00:00] (current)
Line 1: Line 1:
 +====== mkr2cat.py ======
  
 +
 +
 +<code python>
 +# coding: utf-8
 +
 +'''​
 +Converts a bibliographic database in MarcMaker format to "id format"​.
 +
 +The encoding of the original database is utf-8; the output is encoded as
 +latin1 (previously,​ decomposed characters are composed).
 +'''​
 +
 +import sys
 +import unicodedata
 +#import os
 +dbname = sys.argv[1]
 +
 +encoding_in = '​utf-8'​
 +encoding_out = '​latin1'​
 +
 +in_file = open('​%s.mkr'​ % dbname)
 +out_file = open('​%s.id'​ % dbname, '​w'​)
 +
 +out_file.write('​!ID 0\n')
 +for line in in_file:
 +    if line[:1] == '​=':​
 +        tag = line[1:4]
 +        if tag > '​900':​
 +            continue
 +        elif tag < '​010':​
 +            data = line[6:]
 +            out = '​!v%s!%s'​ % (tag, data)
 +        else:
 +            line = line.decode(encoding_in)
 +            indicators = line[6:​8].replace('​\\',​ '#'​)
 +            subfields = line[8:​].replace('​$',​ '​^'​).replace('​{dollar}',​ '​$'​)
 +            subfields = unicodedata.normalize('​NFC',​ subfields)
 +            out = '​!v%s!%s%s'​ % (tag, indicators, subfields)
 +            out = out.encode(encoding_out,​ '​ignore'​)
 +    else:
 +        out = '\n!ID 0\n'
 +    out_file.write(out)
 +
 +out_file.close()
 +</​code>​
mkr2cat.py.txt ยท Last modified: 12/04/2012 00:00 (external edit)