All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.undercouch.citeproc.ris.RISParser Maven / Gradle / Ivy

package de.undercouch.citeproc.ris;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang3.StringUtils;

/**
 * Parses RIS library files
 * 
 * @author Michel Kraemer
 */
public class RISParser {

    /**
     * Parses RIS library files
     * 
     * @param r
     *            the reader that provides the input to parse
     * @return the parsed RIS library
     * @throws IOException
     *             if the input could not be read
     */
    @SuppressWarnings("resource")
    public RISLibrary parse(Reader r) throws IOException {
        BufferedReader br;
        if (r instanceof BufferedReader) {
            br = (BufferedReader) r;
        } else {
            br = new BufferedReader(r);
        }

        RISLibrary result = new RISLibrary();
        RISReferenceBuilder builder = null;
        List editors = new ArrayList<>();
        List tertiaryAuthors = new ArrayList<>();
        List subsidiaryAuthors = new ArrayList<>();
        List authors = new ArrayList<>();
        List keywords = new ArrayList<>();
        List fileAttachments = new ArrayList<>();
        List notes = new ArrayList<>();
        List translatedAuthors = new ArrayList<>();

        int lc = 0;
        String line;
        while ((line = br.readLine()) != null) {
            ++lc;
            line = line.trim();

            if (line.equals("ER  -")) {

                // end of reference
                handleReference(builder, editors, tertiaryAuthors, subsidiaryAuthors, authors, keywords,
                        fileAttachments, notes, translatedAuthors, result);
                editors.clear();
                tertiaryAuthors.clear();
                subsidiaryAuthors.clear();
                authors.clear();
                keywords.clear();
                fileAttachments.clear();
                notes.clear();
                translatedAuthors.clear();
                builder = null;
                continue;
            }

            if (line.isEmpty()) {
                // allow for empty lines. this does not comply with the
                // standard but is exported by Zotero
                continue;
            }

            if (line.length() < 7) {
                throw new IOException("Line " + lc + " is too short");
            }

            if (line.charAt(4) != '-') {
                throw new IOException("Tag and value must be separated by " + "'-' character in line " + lc);
            }

            String key = line.substring(0, 2).trim();
            String value = line.substring(6).trim();

            if (builder == null) {
                builder = new RISReferenceBuilder();
            }

            if (key.equalsIgnoreCase("TY")) {

                builder.type(parseType(value, lc));

            } else

            if (key.equalsIgnoreCase("A2")) {

                editors.add(value);

            } else

            if (key.equalsIgnoreCase("A3")) {

                tertiaryAuthors.add(value);

            } else

            if (key.equalsIgnoreCase("A4")) {

                subsidiaryAuthors.add(value);

            } else

            if (key.equalsIgnoreCase("AB")) {

                builder.abstrct(value);

            } else

            if (key.equalsIgnoreCase("AD")) {

                builder.authorAddress(value);

            } else

            if (key.equalsIgnoreCase("AN")) {

                builder.accessionNumber(value);

            } else

            if (key.equalsIgnoreCase("AU")) {

                authors.add(value);

            } else

            if (key.equalsIgnoreCase("BT")) {

                builder.bookOrConference(value);

            } else

            if (key.equalsIgnoreCase("C1")) {

                builder.custom1(value);

            } else

            if (key.equalsIgnoreCase("C2")) {

                builder.custom2(value);

            } else

            if (key.equalsIgnoreCase("C3")) {

                builder.custom3(value);

            } else

            if (key.equalsIgnoreCase("C4")) {

                builder.custom4(value);

            } else

            if (key.equalsIgnoreCase("C5")) {

                builder.custom5(value);

            } else

            if (key.equalsIgnoreCase("C6")) {

                builder.custom6(value);

            } else

            if (key.equalsIgnoreCase("C7")) {

                builder.custom7(value);

            } else

            if (key.equalsIgnoreCase("C8")) {

                builder.custom8(value);

            } else

            if (key.equalsIgnoreCase("CA")) {

                builder.caption(value);

            } else

            if (key.equalsIgnoreCase("CN")) {

                builder.callNumber(value);

            } else

            if (key.equalsIgnoreCase("CY")) {

                builder.place(value);

            } else

            if (key.equalsIgnoreCase("DA")) {

                builder.date(value);

            } else

            if (key.equalsIgnoreCase("DB")) {

                builder.nameOfDatabase(value);

            } else

            if (key.equalsIgnoreCase("DO")) {

                builder.DOI(value);

            } else

            if (key.equalsIgnoreCase("DP")) {

                builder.databaseProvider(value);

            } else

            if (key.equalsIgnoreCase("ED")) {

                editors.add(value);

            } else

            if (key.equalsIgnoreCase("EP")) {

                builder.endPage(value);

            } else

            if (key.equalsIgnoreCase("ET")) {

                builder.edition(value);

            } else

            if (key.equalsIgnoreCase("ID")) {

                builder.id(value);

            } else

            if (key.equalsIgnoreCase("IS")) {

                builder.issue(value);

            } else

            if (key.equalsIgnoreCase("JO")) {

                builder.journal(value);

            } else

            if (key.equalsIgnoreCase("J2")) {

                builder.journal(value);

            } else

            if (key.equalsIgnoreCase("KW")) {

                keywords.add(value);

            } else

            if (key.equalsIgnoreCase("L1")) {

                fileAttachments.add(value);

            } else

            if (key.equalsIgnoreCase("L4")) {

                builder.figure(value);

            } else

            if (key.equalsIgnoreCase("LA")) {

                builder.language(value);

            } else

            if (key.equalsIgnoreCase("LB")) {

                builder.label(value);

            } else

            if (key.equalsIgnoreCase("M1")) {

                builder.number(value);

            } else

            if (key.equalsIgnoreCase("M3")) {

                builder.typeOfWork(value);

            } else

            if (key.equalsIgnoreCase("N1")) {

                notes.add(value);

            } else

            if (key.equalsIgnoreCase("N2")) {

                builder.abstrct(value);

            } else

            if (key.equalsIgnoreCase("NV")) {

                builder.numberOfVolumes(value);

            } else

            if (key.equalsIgnoreCase("OP")) {

                builder.originalPublication(value);

            } else

            if (key.equalsIgnoreCase("PB")) {

                builder.publisher(value);

            } else

            if (key.equalsIgnoreCase("PY")) {

                builder.year(value);

            } else

            if (key.equalsIgnoreCase("RI")) {

                builder.reviewedItem(value);

            } else

            if (key.equalsIgnoreCase("RN")) {

                builder.researchNotes(value);

            } else

            if (key.equalsIgnoreCase("RP")) {

                builder.reprintEdition(value);

            } else

            if (key.equalsIgnoreCase("SE")) {

                builder.section(value);

            } else

            if (key.equalsIgnoreCase("SN")) {

                builder.isbnOrIssn(value);

            } else

            if (key.equalsIgnoreCase("SP")) {

                builder.startPage(value);

            } else

            if (key.equalsIgnoreCase("ST")) {

                builder.shortTitle(value);

            } else

            if (key.equalsIgnoreCase("T1")) {

                builder.primaryTitle(value);

            } else

            if (key.equalsIgnoreCase("T2")) {

                builder.secondaryTitle(value);

            } else

            if (key.equalsIgnoreCase("T3")) {

                builder.tertiaryTitle(value);

            } else

            if (key.equalsIgnoreCase("TA")) {

                translatedAuthors.add(value);

            } else

            if (key.equalsIgnoreCase("TI")) {

                builder.title(value);

            } else

            if (key.equalsIgnoreCase("TT")) {

                builder.translatedTitle(value);

            } else

            if (key.equalsIgnoreCase("U1")) {

                builder.typeOfWork(value);

            } else

            if (key.equalsIgnoreCase("UR")) {

                builder.URL(value);

            } else

            if (key.equalsIgnoreCase("VL")) {

                builder.volume(value);

            } else

            if (key.equalsIgnoreCase("Y2")) {

                builder.accessDate(value);

            } else

            {

                // ignore unknown tags

            }
        }

        handleReference(builder, editors, tertiaryAuthors, subsidiaryAuthors, authors, keywords, fileAttachments, notes,
                translatedAuthors, result);

        return result;
    }

    private void handleReference(RISReferenceBuilder builder, List editors, List tertiaryAuthors,
            List subsidiaryAuthors, List authors, List keywords, List fileAttachments,
            List notes, List translatedAuthors, RISLibrary result) {
        if (builder != null) {

            if (!editors.isEmpty()) {
                builder.editors(editors.toArray(new String[0]));
            }

            if (!tertiaryAuthors.isEmpty()) {
                builder.tertiaryAuthors(tertiaryAuthors.toArray(new String[0]));
            }

            if (!subsidiaryAuthors.isEmpty()) {
                builder.subsidiaryAuthors(subsidiaryAuthors.toArray(new String[0]));
            }

            if (!authors.isEmpty()) {
                builder.authors(authors.toArray(new String[0]));
            }

            if (!keywords.isEmpty()) {
                builder.keywords(keywords.toArray(new String[0]));
            }

            if (!fileAttachments.isEmpty()) {
                builder.fileAttachments(fileAttachments.toArray(new String[0]));
            }

            if (!notes.isEmpty()) {
                builder.notes(notes.toArray(new String[0]));
            }

            if (!translatedAuthors.isEmpty()) {
                builder.translatedAuthors(translatedAuthors.toArray(new String[0]));
            }

            result.addReference(builder.build());
        }
    }

    private RISType parseType(String value, int lc) throws IOException {
        try {
            return RISType.fromString(value);
        } catch (IllegalArgumentException e) {
            throw new IOException("Unknown type in line " + lc);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy