de.undercouch.citeproc.endnote.EndNoteParser Maven / Gradle / Ivy
package de.undercouch.citeproc.endnote;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
/**
* Parses EndNote library files
*
* @author Michel Kraemer
*/
public class EndNoteParser {
/**
* Parses EndNote library files
*
* @param r
* the reader that provides the input to parse
* @return the parsed EndNote library
* @throws IOException
* if the input could not be read
*/
@SuppressWarnings("resource")
public EndNoteLibrary parse(Reader r) throws IOException {
BufferedReader br;
if (r instanceof BufferedReader) {
br = (BufferedReader) r;
} else {
br = new BufferedReader(r);
}
EndNoteLibrary result = new EndNoteLibrary();
EndNoteReferenceBuilder builder = null;
List authors = new ArrayList<>();
List editors = new ArrayList<>();
List translatedAuthors = new ArrayList<>();
List keywords = new ArrayList<>();
List notes = new ArrayList<>();
List tertiaryAuthors = new ArrayList<>();
List subsidiaryAuthors = new ArrayList<>();
int lc = 0;
String line;
while ((line = br.readLine()) != null) {
++lc;
line = line.trim();
if (line.isEmpty()) {
// end of reference
handleReference(builder, authors, editors, translatedAuthors, keywords, notes, tertiaryAuthors,
subsidiaryAuthors, result);
authors.clear();
editors.clear();
translatedAuthors.clear();
keywords.clear();
notes.clear();
tertiaryAuthors.clear();
subsidiaryAuthors.clear();
builder = null;
continue;
}
if (line.length() < 4) {
throw new IOException("Line " + lc + " is too short");
}
if (line.charAt(0) != '%') {
throw new IOException("Illegal first character in line " + lc);
}
if (!Character.isWhitespace(line.charAt(2))) {
throw new IOException("Tag and value must be separated by " + "whitespace character in line " + lc);
}
String key = line.substring(1, 2).trim();
String value = line.substring(3).trim();
if (builder == null) {
builder = new EndNoteReferenceBuilder();
}
switch (line.charAt(1)) {
case '0' :
builder.type(parseType(value, lc));
break;
case '1' :
builder.custom1(value);
break;
case '2' :
builder.custom2(value);
break;
case '3' :
builder.custom3(value);
break;
case '4' :
builder.custom4(value);
break;
case '6' :
builder.numberOfVolumes(value);
break;
case '7' :
builder.edition(value);
break;
case '8' :
builder.date(value);
break;
case '9' :
builder.typeOfWork(value);
break;
case 'A' :
authors.add(value);
break;
case 'B' :
builder.bookOrConference(value);
break;
case 'C' :
builder.place(value);
break;
case 'D' :
builder.year(value);
break;
case 'E' :
editors.add(value);
break;
case 'F' :
builder.label(value);
break;
case 'G' :
builder.language(value);
break;
case 'H' :
translatedAuthors.add(value);
break;
case 'I' :
builder.publisher(value);
break;
case 'J' :
builder.journal(value);
break;
case 'K' :
keywords.add(value);
break;
case 'L' :
builder.callNumber(value);
break;
case 'M' :
builder.accessionNumber(value);
break;
case 'N' :
builder.numberOrIssue(value);
break;
case 'O' :
notes.add(value);
break;
case 'P' :
builder.pages(value);
break;
case 'Q' :
builder.translatedTitle(value);
break;
case 'R' :
builder.electronicResourceNumber(value);
break;
case 'S' :
builder.tertiaryTitle(value);
break;
case 'T' :
builder.title(value);
break;
case 'U' :
builder.URL(value);
break;
case 'V' :
builder.volume(value);
break;
case 'W' :
builder.databaseProvider(value);
break;
case 'X' :
builder.abstrct(value);
break;
case 'Y' :
tertiaryAuthors.add(value);
break;
case 'Z' :
notes.add(value);
break;
case '?' :
subsidiaryAuthors.add(value);
break;
case '@' :
builder.isbnOrIssn(value);
break;
case '!' :
builder.shortTitle(value);
break;
case '#' :
builder.custom5(value);
break;
case '$' :
builder.custom6(value);
break;
case ']' :
builder.custom7(value);
break;
case '&' :
builder.section(value);
break;
case '(' :
builder.originalPublication(value);
break;
case ')' :
builder.reprintEdition(value);
break;
case '*' :
builder.reviewedItem(value);
break;
case '+' :
builder.authorAddress(value);
break;
case '^' :
builder.caption(value);
break;
case '>' :
builder.linkToPDF(value);
break;
case '<' :
builder.researchNotes(value);
break;
case '[' :
builder.accessDate(value);
break;
case '=' :
builder.lastModifiedDate(value);
break;
case '~' :
builder.nameOfDatabase(value);
break;
default :
// ignore unknown tags
break;
}
}
handleReference(builder, authors, editors, translatedAuthors, keywords, notes, tertiaryAuthors,
subsidiaryAuthors, result);
return result;
}
private void handleReference(EndNoteReferenceBuilder builder, List authors, List editors,
List translatedAuthors, List keywords, List notes, List tertiaryAuthors,
List subsidiaryAuthors, EndNoteLibrary result) {
if (builder != null) {
if (!authors.isEmpty()) {
builder.authors(authors.toArray(new String[0]));
}
if (!editors.isEmpty()) {
builder.editors(editors.toArray(new String[0]));
}
if (!translatedAuthors.isEmpty()) {
builder.translatedAuthors(translatedAuthors.toArray(new String[0]));
}
if (!keywords.isEmpty()) {
builder.keywords(keywords.toArray(new String[0]));
}
if (!notes.isEmpty()) {
builder.notes(notes.toArray(new String[0]));
}
if (!tertiaryAuthors.isEmpty()) {
builder.tertiaryAuthors(tertiaryAuthors.toArray(new String[0]));
}
if (!subsidiaryAuthors.isEmpty()) {
builder.subsidiaryAuthors(subsidiaryAuthors.toArray(new String[0]));
}
result.addReference(builder.build());
}
}
private EndNoteType parseType(String value, int lc) throws IOException {
try {
return EndNoteType.fromString(value);
} catch (IllegalArgumentException e) {
throw new IOException("Unknown type in line " + lc);
}
}
}