All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.undercouch.citeproc.bibtex.BibTeXConverter Maven / Gradle / Ivy

package de.undercouch.citeproc.bibtex;

import de.undercouch.citeproc.csl.CSLDate;
import de.undercouch.citeproc.csl.CSLItemData;
import de.undercouch.citeproc.csl.CSLItemDataBuilder;
import de.undercouch.citeproc.csl.CSLType;
import org.jbibtex.BibTeXDatabase;
import org.jbibtex.BibTeXEntry;
import org.jbibtex.BibTeXParser;
import org.jbibtex.BibTeXString;
import org.jbibtex.Key;
import org.jbibtex.LaTeXObject;
import org.jbibtex.LaTeXParser;
import org.jbibtex.LaTeXPrinter;
import org.jbibtex.ParseException;
import org.jbibtex.TokenMgrException;
import org.jbibtex.Value;

import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

/**
 * 

Converts BibTeX items to CSL citation items

*

The class maps BibTeX attributes to CSL attributes. The mapping is * based on the one used in Docear as * presented * by Joeran Beel.

*

Docear is released under the GPLv2 but its code may also be reused in * projects licensed under Apache License 2.0 (see * http://www.docear.org/software/licence/, * last visited 2013-09-06). The mapping here is released under the * Apache License 2.0 by permission of Joaran Beel, Docear.

* @author Joaran Beel * @author Michel Kraemer */ public class BibTeXConverter { private static final String FIELD_ABSTRACT = "abstract"; private static final String FIELD_ACCESSED = "accessed"; private static final String FIELD_ADDRESS = "address"; private static final String FIELD_ANNOTE = "annote"; private static final String FIELD_AUTHOR = "author"; private static final String FIELD_BOOKTITLE = "booktitle"; private static final String FIELD_CHAPTER = "chapter"; private static final String FIELD_DATE = "date"; private static final String FIELD_DOI = "doi"; private static final String FIELD_EDITION = "edition"; private static final String FIELD_EDITOR = "editor"; private static final String FIELD_INSTITUTION = "institution"; private static final String FIELD_ISBN = "isbn"; private static final String FIELD_ISSN = "issn"; private static final String FIELD_ISSUE = "issue"; private static final String FIELD_JOURNAL = "journal"; private static final String FIELD_JOURNALTITLE = "journaltitle"; private static final String FIELD_KEYWORDS = "keywords"; private static final String FIELD_LANGUAGE = "language"; private static final String FIELD_LOCATION = "location"; private static final String FIELD_MONTH = "month"; private static final String FIELD_NOTE = "note"; private static final String FIELD_NUMBER = "number"; private static final String FIELD_ORGANIZATION = "organization"; private static final String FIELD_PAGES = "pages"; private static final String FIELD_PUBLISHER = "publisher"; private static final String FIELD_REVISION = "revision"; private static final String FIELD_SCHOOL = "school"; private static final String FIELD_SERIES = "series"; private static final String FIELD_STATUS = "status"; private static final String FIELD_TITLE = "title"; private static final String FIELD_TYPE = "type"; private static final String FIELD_URL = "url"; private static final String FIELD_URLDATE = "urldate"; private static final String FIELD_VOLUME = "volume"; private static final String FIELD_YEAR = "year"; private static final String TYPE_ARTICLE = "article"; private static final String TYPE_BOOK = "book"; private static final String TYPE_BOOKLET = "booklet"; private static final String TYPE_CONFERENCE = "conference"; private static final String TYPE_ELECTRONIC = "electronic"; private static final String TYPE_INBOOK = "inbook"; private static final String TYPE_INCOLLECTION = "incollection"; private static final String TYPE_INPROCEEDINGS = "inproceedings"; private static final String TYPE_MANUAL = "manual"; private static final String TYPE_MASTERSTHESIS = "mastersthesis"; private static final String TYPE_ONLINE = "online"; private static final String TYPE_PATENT = "patent"; private static final String TYPE_PERIODICAL = "periodical"; private static final String TYPE_PHDTHESIS = "phdthesis"; private static final String TYPE_PROCEEDINGS = "proceedings"; private static final String TYPE_STANDARD = "standard"; private static final String TYPE_TECHREPORT = "techreport"; private static final String TYPE_UNPUBLISHED = "unpublished"; private static final String TYPE_WWW = "www"; private final LaTeXParser latexParser; private final LaTeXPrinter latexPrinter; /** * Default constructor */ public BibTeXConverter() { try { latexParser = new LaTeXParser(); } catch (ParseException e) { // can actually never happen because the default constructor // of LaTeXParser doesn't throw throw new RuntimeException(e); } latexPrinter = new LaTeXPrinter(); } /** *

Loads a BibTeX database from a stream.

*

This method does not close the given stream. The caller is * responsible for closing it.

* @param is the input stream to read from * @return the BibTeX database * @throws ParseException if the database is invalid */ public BibTeXDatabase loadDatabase(InputStream is) throws ParseException { Reader reader = new InputStreamReader(is, StandardCharsets.UTF_8); BibTeXParser parser = new BibTeXParser() { @Override public void checkStringResolution(Key key, BibTeXString string) { // ignore } }; try { return parser.parse(reader); } catch (TokenMgrException err) { throw new ParseException("Could not parse BibTeX library: " + err.getMessage()); } } /** * Converts the given database to a map of CSL citation items * @param db the database * @return a map consisting of citation keys and citation items */ public Map toItemData(BibTeXDatabase db) { Map result = new LinkedHashMap<>(); for (Map.Entry e : db.getEntries().entrySet()) { result.put(e.getKey().getValue(), toItemData(e.getValue())); } return result; } /** * Converts a BibTeX entry to a citation item * @param e the BibTeX entry to convert * @return the citation item */ public CSLItemData toItemData(BibTeXEntry e) { // get all fields from the BibTeX entry Map entries = new HashMap<>(); for (Map.Entry field : e.getFields().entrySet()) { String us = field.getValue().toUserString().replaceAll("\\r", ""); // convert LaTeX string to normal text try { List objs = latexParser.parse(new StringReader(us)); us = latexPrinter.print(objs).replaceAll("\\n", " ").replaceAll("\\r", "").trim(); } catch (ParseException | TokenMgrException ex) { // ignore } entries.put(field.getKey().getValue().toLowerCase(), us); } // map type CSLType type = toType(e.getType()); CSLItemDataBuilder builder = new CSLItemDataBuilder() .id(e.getKey().getValue()).type(type); // map address if (entries.containsKey(FIELD_LOCATION)) { builder.eventPlace(entries.get(FIELD_LOCATION)); builder.publisherPlace(entries.get(FIELD_LOCATION)); } else { builder.eventPlace(entries.get(FIELD_ADDRESS)); builder.publisherPlace(entries.get(FIELD_ADDRESS)); } // map author if (entries.containsKey(FIELD_AUTHOR)) { builder.author(NameParser.parse(entries.get(FIELD_AUTHOR))); } // map editor if (entries.containsKey(FIELD_EDITOR)) { builder.editor(NameParser.parse(entries.get(FIELD_EDITOR))); builder.collectionEditor(NameParser.parse(entries.get(FIELD_EDITOR))); } // map date CSLDate date; if (entries.containsKey(FIELD_DATE)) { date = DateParser.toDate(entries.get(FIELD_DATE)); } else { date = DateParser.toDate(entries.get(FIELD_YEAR), entries.get(FIELD_MONTH)); } builder.issued(date); builder.eventDate(date); // 'urldate' is the access date in biblatex as defined in // https://ctan.kako-dev.de/macros/latex/contrib/biblatex/doc/biblatex.pdf if (entries.containsKey(FIELD_URLDATE)) { CSLDate urlDate = DateParser.toDate(entries.get(FIELD_URLDATE)); builder.accessed(urlDate); } // map journal/journaltitle, booktitle, series if (entries.containsKey(FIELD_JOURNAL)) { builder.containerTitle(entries.get(FIELD_JOURNAL)); } else if (entries.containsKey(FIELD_JOURNALTITLE)) { builder.containerTitle(entries.get(FIELD_JOURNALTITLE)); } else if (entries.containsKey(FIELD_BOOKTITLE)) { builder.containerTitle(entries.get(FIELD_BOOKTITLE)); } else { builder.collectionTitle(entries.get(FIELD_SERIES)); } if (entries.containsKey(FIELD_SERIES)) { if (entries.containsKey(FIELD_JOURNAL)) { builder.containerTitle(entries.get(FIELD_JOURNAL)); builder.collectionTitle(entries.get(FIELD_SERIES)); } else if (entries.containsKey(FIELD_JOURNALTITLE)) { builder.containerTitle(entries.get(FIELD_JOURNALTITLE)); builder.collectionTitle(entries.get(FIELD_SERIES)); } else if (entries.containsKey(FIELD_BOOKTITLE)) { builder.containerTitle(entries.get(FIELD_BOOKTITLE)); builder.collectionTitle(entries.get(FIELD_SERIES)); } } // map number and issue builder.number(entries.get(FIELD_NUMBER)); builder.issue(entries.get(FIELD_ISSUE)); // map publisher, institution, school, organisation if (type == CSLType.REPORT) { if (entries.containsKey(FIELD_PUBLISHER)) { builder.publisher(entries.get(FIELD_PUBLISHER)); } else if (entries.containsKey(FIELD_INSTITUTION)) { builder.publisher(entries.get(FIELD_INSTITUTION)); } else if (entries.containsKey(FIELD_SCHOOL)) { builder.publisher(entries.get(FIELD_SCHOOL)); } else { builder.publisher(entries.get(FIELD_ORGANIZATION)); } } else if (type == CSLType.THESIS) { if (entries.containsKey(FIELD_PUBLISHER)) { builder.publisher(entries.get(FIELD_PUBLISHER)); } else if (entries.containsKey(FIELD_SCHOOL)) { builder.publisher(entries.get(FIELD_SCHOOL)); } else if (entries.containsKey(FIELD_INSTITUTION)) { builder.publisher(entries.get(FIELD_INSTITUTION)); } else { builder.publisher(entries.get(FIELD_ORGANIZATION)); } } else { if (entries.containsKey(FIELD_PUBLISHER)) { builder.publisher(entries.get(FIELD_PUBLISHER)); } else if (entries.containsKey(FIELD_ORGANIZATION)) { builder.publisher(entries.get(FIELD_ORGANIZATION)); } else if (entries.containsKey(FIELD_INSTITUTION)) { builder.publisher(entries.get(FIELD_INSTITUTION)); } else { builder.publisher(entries.get(FIELD_SCHOOL)); } } // map title or chapter if (entries.containsKey(FIELD_TITLE)) { builder.title(entries.get(FIELD_TITLE)); } else { builder.title(entries.get(FIELD_CHAPTER)); } // map pages String pages = entries.get(FIELD_PAGES); if (pages != null) { PageRanges ranges = PageParser.parse(pages); builder.page(ranges.getLiteral()); builder.pageFirst(ranges.getPageFirst()); Integer numberOfPages = ranges.getNumberOfPages(); if (numberOfPages != null) { builder.numberOfPages(String.valueOf(numberOfPages)); } } // map last accessed date if (entries.containsKey(FIELD_ACCESSED)) { builder.accessed(DateParser.toDate(entries.get(FIELD_ACCESSED))); } // map genre as per https://aurimasv.github.io/z2csl/typeMap.xml#map-thesis switch (type) { case BOOK: case MANUSCRIPT: case MAP: case MOTION_PICTURE: case PERSONAL_COMMUNICATION: case POST: case POST_WEBLOG: case REPORT: case SPEECH: case THESIS: case WEBPAGE: if (entries.containsKey(FIELD_TYPE)) { builder.genre(entries.get(FIELD_TYPE)); } break; default: // ignore genre break; } // map language if (entries.containsKey(FIELD_LANGUAGE)) { builder.language(entries.get(FIELD_LANGUAGE)); } // map other attributes builder.volume(entries.get(FIELD_VOLUME)); builder.keyword(entries.get(FIELD_KEYWORDS)); builder.URL(entries.get(FIELD_URL)); builder.status(entries.get(FIELD_STATUS)); builder.ISSN(entries.get(FIELD_ISSN)); builder.ISBN(entries.get(FIELD_ISBN)); builder.version(entries.get(FIELD_REVISION)); builder.annote(entries.get(FIELD_ANNOTE)); builder.edition(entries.get(FIELD_EDITION)); builder.abstrct(entries.get(FIELD_ABSTRACT)); builder.DOI(entries.get(FIELD_DOI)); builder.note(entries.get(FIELD_NOTE)); // create citation item return builder.build(); } /** * Converts a BibTeX type to a CSL type * @param type the type to convert * @return the converted type (never null, falls back to {@link CSLType#ARTICLE}) */ public CSLType toType(Key type) { String s = type.getValue(); if (s.equalsIgnoreCase(TYPE_ARTICLE)) { return CSLType.ARTICLE_JOURNAL; } else if (s.equalsIgnoreCase(TYPE_PROCEEDINGS)) { return CSLType.BOOK; } else if (s.equalsIgnoreCase(TYPE_MANUAL)) { return CSLType.BOOK; } else if (s.equalsIgnoreCase(TYPE_BOOK)) { return CSLType.BOOK; } else if (s.equalsIgnoreCase(TYPE_PERIODICAL)) { return CSLType.BOOK; } else if (s.equalsIgnoreCase(TYPE_BOOKLET)) { return CSLType.PAMPHLET; } else if (s.equalsIgnoreCase(TYPE_INBOOK)) { return CSLType.CHAPTER; } else if (s.equalsIgnoreCase(TYPE_INCOLLECTION)) { return CSLType.CHAPTER; } else if (s.equalsIgnoreCase(TYPE_INPROCEEDINGS)) { return CSLType.PAPER_CONFERENCE; } else if (s.equalsIgnoreCase(TYPE_CONFERENCE)) { return CSLType.PAPER_CONFERENCE; } else if (s.equalsIgnoreCase(TYPE_MASTERSTHESIS)) { return CSLType.THESIS; } else if (s.equalsIgnoreCase(TYPE_PHDTHESIS)) { return CSLType.THESIS; } else if (s.equalsIgnoreCase(TYPE_TECHREPORT)) { return CSLType.REPORT; } else if (s.equalsIgnoreCase(TYPE_PATENT)) { return CSLType.PATENT; } else if (s.equalsIgnoreCase(TYPE_ELECTRONIC)) { return CSLType.WEBPAGE; } else if (s.equalsIgnoreCase(TYPE_ONLINE)) { return CSLType.WEBPAGE; } else if (s.equalsIgnoreCase(TYPE_WWW)) { return CSLType.WEBPAGE; } else if (s.equalsIgnoreCase(TYPE_STANDARD)) { return CSLType.LEGISLATION; } else if (s.equalsIgnoreCase(TYPE_UNPUBLISHED)) { return CSLType.MANUSCRIPT; } return CSLType.ARTICLE; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy