All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.metafacture.biblio.pica.PicaEncoder Maven / Gradle / Ivy

There is a newer version: 6.1.2
Show newest version
/*
 * Copyright 2013, 2014 Deutsche Nationalbibliothek
 *
 * Licensed under the Apache License, Version 2.0 the "License";
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.metafacture.biblio.pica;

import java.text.Normalizer;
import java.text.Normalizer.Form;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.FormatException;
import org.metafacture.framework.MissingIdException;
import org.metafacture.framework.ObjectReceiver;
import org.metafacture.framework.StreamReceiver;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.helpers.DefaultStreamPipe;


/**
 * Encodes an event stream in pica+ format.
 *
 * Special handling of subfield 'S': the code of
 * "control subfields" (subfield name='S') will be appended to the fieldName.
 * E.g.: 041A $Saxx would be mapped to the fieldName 041Aa, and xx will be
 * ignored. A recovery of such field to original is not implemented. So the
 * encoder cannot identify an S-field.
 * The S-field special processing can be turned on if the decoder is called
 * with the option: (appendcontrolsubfield="true")
 * The default value of this option is set to "false".
 *
 * @see PicaDecoder
 *
 * @author Yining Li
 *
 */
@Description("Encodes a stream in pica+ format")
@In(StreamReceiver.class)
@Out(String.class)
@FluxCommand("encode-pica")
public final class PicaEncoder extends DefaultStreamPipe> {

    private static final String FIELD_DELIMITER = "\u001e";
    private static final String SUB_DELIMITER = "\u001f";
    private static final String FIELD_IDN_INTERN = "003@";
    private static final String FIELD_NAME_PATTERN_STRING = "\\d{3}.(/..)?";
    private static final Pattern FIELD_NAME_PATTERN = Pattern.compile(FIELD_NAME_PATTERN_STRING);

    private static StringBuilder builder = new StringBuilder(); //Result of the encoding process

    private boolean entityOpen;         //Flag to inform whether an entity is opened.
    private boolean idnControlSubField; //Flag to inform whether it is the 003@ field.
    private boolean ignoreRecordId;     //Flag to decide whether the record Id is checked.

    private String id;

    @Override
    public void startRecord(final String recordId) {
        // the name is a idn, which should be found in the encoded data under 003@.
        //any rest of the previous record is cleared before the new begins.
        builder.setLength(0);
        this.id = recordId;
        //Now an entity can be opened. But no literal is allowed.
        this.entityOpen = false;
    }

    public void setIgnoreRecordId(final boolean ignoreRecordId) {
        this.ignoreRecordId = ignoreRecordId;
    }

    public boolean getIgnoreRecordId() {
        return this.ignoreRecordId;
    }

    @Override
    public void startEntity(final String name) {
        // Here begins a field (i.e. "028A ", which is given in the name.
        // It is unknown, whether there are any subfields in the field.
        final Matcher fieldNameMatcher = FIELD_NAME_PATTERN.matcher(name);
        if (!fieldNameMatcher.matches()) {
            throw new FormatException(name);
        }
        if (entityOpen) { //No nested entities are allowed in pica+.
            throw new FormatException(name);
        }
        builder.append(name.trim()+ " ");

        idnControlSubField = !ignoreRecordId && FIELD_IDN_INTERN.equals(name.trim());
        //Now literals can be opened but no more entities.
        this.entityOpen = true;
    }

    @Override
    public void literal(final String name, final String value) {
        //A Subfield has one character or digit exactly.
        if (name.length() != 1) {
            throw new FormatException(name);
        }
        if (!entityOpen) {
            throw new FormatException(name); //new exceptions definition for literal out of entity
        }
        final String valueNew = Normalizer.normalize(value, Form.NFD);
        if (idnControlSubField) {
            // it is a 003@ field, the same record id delivered with record should follow
            if (!this.id.equals(value)) {
                throw new MissingIdException(value);
            }
            idnControlSubField = false; //only one record Id will be checked.
        }
        builder.append(SUB_DELIMITER);
        builder.append(name);
        builder.append(valueNew);
}

    @Override
    public void endEntity() {
        builder.append(FIELD_DELIMITER);
        //Now an entity can be opened. But no literal is allowed.
        this.entityOpen = false;
    }

    @Override
    public void endRecord() {
        getReceiver().process(builder.toString());
        //No literal is allowed.
        this.entityOpen = false;
    }

    @Override
    protected void onResetStream() {
        builder.setLength(0);
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy