A static method for turning raw MARC data in transission format into a MARC::Record object.
# File lib/marc/reader.rb, line 73 def self.decode(marc, params={}) record = Record.new() record.leader = marc[0..LEADER_LENGTH-1] # where the field data starts base_address = record.leader[12..16].to_i # get the byte offsets from the record directory directory = marc[LEADER_LENGTH..base_address-1] throw "invalid directory in record" if directory == nil # the number of fields in the record corresponds to # how many directory entries there are num_fields = directory.length / DIRECTORY_ENTRY_LENGTH # when operating in forgiving mode we just split on end of # field instead of using calculated byte offsets from the # directory if params[:forgiving] all_fields = marc[base_address..-1].split(END_OF_FIELD) else mba = marc.bytes.to_a end 0.upto(num_fields-1) do |field_num| # pull the directory entry for a field out entry_start = field_num * DIRECTORY_ENTRY_LENGTH entry_end = entry_start + DIRECTORY_ENTRY_LENGTH entry = directory[entry_start..entry_end] # extract the tag tag = entry[0..2] # get the actual field data # if we were told to be forgiving we just use the # next available chuck of field data that we # split apart based on the END_OF_FIELD field_data = '' if params[:forgiving] field_data = all_fields.shift() # otherwise we actually use the byte offsets in # directory to figure out what field data to extract else length = entry[3..6].to_i offset = entry[7..11].to_i field_start = base_address + offset field_end = field_start + length - 1 field_data = mba[field_start..field_end].pack("c*") end # remove end of field field_data.delete!(END_OF_FIELD) # add a control field or data field if MARC::ControlField.control_tag?(tag) record.append(MARC::ControlField.new(tag,field_data)) else field = MARC::DataField.new(tag) # get all subfields subfields = field_data.split(SUBFIELD_INDICATOR) # must have at least 2 elements (indicators, and 1 subfield) # TODO some sort of logging? next if subfields.length() < 2 # get indicators indicators = subfields.shift() field.indicator1 = indicators[0,1] field.indicator2 = indicators[1,1] # add each subfield to the field subfields.each() do |data| subfield = MARC::Subfield.new(data[0,1],data[1..-1]) field.append(subfield) end # add the field to the record record.append(field) end end return record end
The constructor which you may pass either a path
reader = MARC::Reader.new('marc.dat')
or, if it's more convenient a File object:
fh = File.new('marc.dat') reader = MARC::Reader.new(fh)
or really any object that responds to read(n)
# marc is a string with a bunch of records in it reader = MARC::Reader.new(StringIO.new(reader))
If your data have non-standard control fields in them (e.g., Aleph's 'FMT') you need to add them specifically to the MARC::ControlField.control_tags Set object
MARC::ControlField.control_tags << 'FMT'
# File lib/marc/reader.rb, line 26 def initialize(file) if file.is_a?(String) @handle = File.new(file) elsif file.respond_to?("read", 5) @handle = file else throw "must pass in path or file" end end
to support iteration:
for record in reader print record end
and even searching:
record.find { |f| f['245'] =~ %rHuckleberry/ }
# File lib/marc/reader.rb, line 44 def each # while there is data left in the file while rec_length_s = @handle.read(5) # make sure the record length looks like an integer rec_length_i = rec_length_s.to_i if rec_length_i == 0 raise MARC::Exception.new("invalid record length: #{rec_length_s}") end # get the raw MARC21 for a record back from the file # using the record length raw = rec_length_s + @handle.read(rec_length_i-5) # Ruby 1.9 will try to set the encoding to ASCII-8BIT, which we don't want. # Not entirely sure what happens for MARC-8 encoded records, but, technically, # ruby-marc doesn't support MARC-8, anyway. raw.force_encoding('utf-8') if raw.respond_to?(:force_encoding) # create a record from the data and return it #record = MARC::Record.new_from_marc(raw) record = MARC::Reader.decode(raw) yield record end end