![JAR search and dependency download from the Maven repository](/logo.png)
org.marc4j.MarcXmlHandler Maven / Gradle / Ivy
/**
* Copyright (C) 2004 Bas Peters
*
* This file is part of MARC4J
*
* MARC4J is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* MARC4J is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with MARC4J; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
package org.marc4j;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.marc4j.marc.ControlField;
import org.marc4j.marc.DataField;
import org.marc4j.marc.Leader;
import org.marc4j.marc.MarcFactory;
import org.marc4j.marc.Record;
import org.marc4j.marc.Subfield;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
/**
* Creates Record
objects from SAX events and pushes each item onto the top of the
* RecordStack
.
*
* @author Bas Peters
*/
public class MarcXmlHandler implements ContentHandler {
private final RecordStack queue;
private StringBuffer sb;
private Subfield subfield;
private ControlField controlField;
private DataField dataField;
private Record record;
private String tag;
/** Constants representing each valid tag type */
private static final int COLLECTION_ID = 1;
private static final int LEADER_ID = 2;
private static final int RECORD_ID = 3;
private static final int CONTROLFIELD_ID = 4;
private static final int DATAFIELD_ID = 5;
private static final int SUBFIELD_ID = 6;
/** The tag attribute name string */
private static final String TAG_ATTR = "tag";
/** The code attribute name string */
private static final String CODE_ATTR = "code";
/** The first indicator attribute name string */
private static final String IND_1_ATTR = "ind1";
/** The second indicator attribute name string */
private static final String IND_2_ATTR = "ind2";
/** The type attribute name string */
private static final String TYPE_ATTR = "type";
private static final Set RECORD_TYPES;
/** Set for mapping of element strings to constants (Integer) */
private static final Map ELEMENTS;
private MarcFactory factory = null;
static {
ELEMENTS = new HashMap();
ELEMENTS.put("collection", new Integer(COLLECTION_ID));
ELEMENTS.put("leader", new Integer(LEADER_ID));
ELEMENTS.put("record", new Integer(RECORD_ID));
ELEMENTS.put("controlfield", new Integer(CONTROLFIELD_ID));
ELEMENTS.put("datafield", new Integer(DATAFIELD_ID));
ELEMENTS.put("subfield", new Integer(SUBFIELD_ID));
RECORD_TYPES = new HashSet();
RECORD_TYPES.add("Bibliographic");
RECORD_TYPES.add("Authority");
RECORD_TYPES.add("Holdings");
RECORD_TYPES.add("Classification");
RECORD_TYPES.add("Community");
}
/**
* Default constructor.
*
* @param queue
*/
public MarcXmlHandler(final RecordStack queue) {
this.queue = queue;
factory = MarcFactory.newInstance();
}
/**
* An event fired at the start of the document.
*/
@Override
public void startDocument() throws SAXException {
}
/**
* An event fired at the start of an element.
*/
@Override
public void startElement(final String uri, final String name, final String qName, final Attributes atts)
throws SAXException {
final String realname = name.length() == 0 ? qName : name;
final Integer elementType = ELEMENTS.get(stripNsPrefix(realname));
if (elementType == null) {
throw new MarcException("Unexpected XML element: " + realname);
}
switch (elementType.intValue()) {
case COLLECTION_ID:
break;
case RECORD_ID:
final String typeAttr = atts.getValue(TYPE_ATTR);
record = factory.newRecord();
if (typeAttr != null && RECORD_TYPES.contains(typeAttr)) {
record.setType(typeAttr);
}
break;
case LEADER_ID:
sb = new StringBuffer();
break;
case CONTROLFIELD_ID:
tag = atts.getValue(TAG_ATTR);
if (tag == null) {
throw new MarcException("ControlField missing tag value");
}
controlField = factory.newControlField(tag);
sb = new StringBuffer();
break;
case DATAFIELD_ID:
tag = atts.getValue(TAG_ATTR);
if (tag == null) {
throw new MarcException("DataField missing tag value");
}
String ind1 = atts.getValue(IND_1_ATTR);
String ind2 = atts.getValue(IND_2_ATTR);
if (ind1 == null) {
throw new MarcException("DataField (" + tag + ") missing first indicator");
}
if (ind2 == null) {
throw new MarcException("DataField (" + tag + ") missing second indicator");
}
if (ind1.length() == 0) {
ind1 = " ";
}
if (ind2.length() == 0) {
ind2 = " ";
}
dataField = factory.newDataField(tag, ind1.charAt(0), ind2.charAt(0));
break;
case SUBFIELD_ID:
String code = atts.getValue(CODE_ATTR);
if (code == null) {
throw new MarcException("Subfield missing code attribute");
}
if (code.length() == 0) {
code = " ";
}
subfield = factory.newSubfield(code.charAt(0));
sb = new StringBuffer();
}
}
/**
* An event fired as characters are consumed.
*
* @param ch
* @param start
* @param length
*/
@Override
public void characters(final char[] ch, final int start, final int length) throws SAXException {
if (sb != null) {
sb.append(ch, start, length);
}
}
/**
* An event fired at the end of an element.
*
* @param uri
* @param name
* @param qName
*/
@Override
public void endElement(final String uri, final String name, final String qName) throws SAXException {
final String realname = name.length() == 0 ? qName : name;
final Integer elementType = ELEMENTS.get(stripNsPrefix(realname));
if (elementType == null) {
throw new MarcException("Unexpected XML element: " + realname);
}
switch (elementType.intValue()) {
case COLLECTION_ID:
break;
case RECORD_ID:
queue.push(record);
break;
case LEADER_ID:
final Leader leader = factory.newLeader(sb.toString());
record.setLeader(leader);
break;
case CONTROLFIELD_ID:
controlField.setData(sb.toString());
record.addVariableField(controlField);
break;
case DATAFIELD_ID:
record.addVariableField(dataField);
break;
case SUBFIELD_ID:
subfield.setData(sb.toString());
dataField.addSubfield(subfield);
}
}
/**
* An event fired at the end of the document.
*/
@Override
public void endDocument() throws SAXException {
queue.end();
}
/**
* An event fired while consuming ignorable whitespace.
*
* @param data
* @param offset
* @param length
* @throws SAXException
*/
@Override
public void ignorableWhitespace(final char[] data, final int offset, final int length) throws SAXException {
// not implemented
}
/**
* An event fired at the end of prefix mapping.
*
* @param prefix
* @throws SAXException
*/
@Override
public void endPrefixMapping(final String prefix) throws SAXException {
}
/**
* An event fired while consuming a skipped entity.
*
* @param name
* @throws SAXException
*/
@Override
public void skippedEntity(final String name) throws SAXException {
// not implemented
}
/**
* An event fired while consuming a document locator.
*
* @param locator
*/
@Override
public void setDocumentLocator(final Locator locator) {
// not implemented
}
/**
* An event fired while consuming a processing instruction.
*
* @param target
* @param data
* @throws SAXException
*/
@Override
public void processingInstruction(final String target, final String data) throws SAXException {
// not implemented
}
/**
* An event fired at the start of prefix mapping.
*
* @param prefix
* @param uri
*/
@Override
public void startPrefixMapping(final String prefix, final String uri) throws SAXException {
// not implemented
}
/**
* Handle namespace prefixes; also fixes issue with broken SAX emitters that spit out QName instead of local name.
* None of our MARCXML local names should have colons.
*
* @param aName An element name
* @return The element name without a namespace prefix
*/
private String stripNsPrefix(final String aName) {
final int index = aName.indexOf(":");
if (index == -1 || index + 1 == aName.length()) {
return aName;
} else {
return aName.substring(index + 1);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy