All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.marc4j.MarcXmlWriter Maven / Gradle / Ivy

Go to download

An easy to use Application Programming Interface (API) for working with MARC and MARCXML in Java.

There is a newer version: 2.6.12
Show newest version
/**
 * Copyright (C) 2004 Bas Peters
 *
 * This file is part of MARC4J
 *
 * MARC4J is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * MARC4J is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with MARC4J; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.marc4j;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;

import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

import org.marc4j.converter.CharConverter;
import org.marc4j.marc.ControlField;
import org.marc4j.marc.DataField;
import org.marc4j.marc.Leader;
import org.marc4j.marc.MarcFactory;
import org.marc4j.marc.Record;
import org.marc4j.marc.Subfield;
import org.marc4j.util.Normalizer;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/**
 * Class for writing MARC record objects in MARCXML format. This class outputs a
 * SAX event stream to the given {@link java.io.OutputStream}  or
 * {@link javax.xml.transform.Result} object. It can be used in a SAX
 * pipeline to post-process the result. By default this class uses a null
 * transform. It is strongly recommended to use a dedicated XML serializer.
 * 

*

* This class requires a JAXP compliant XML parser and XSLT processor. The * underlying SAX2 parser should be namespace aware. *

*

* The following example reads a file with MARC records and writes MARCXML * records in UTF-8 encoding to the console: *

*

* *

 *
 *      InputStream input = new FileInputStream("input.mrc")
 *      MarcReader reader = new MarcStreamReader(input);
 *
 *      MarcWriter writer = new MarcXmlWriter(System.out, true);
 *      while (reader.hasNext()) {
 *          Record record = reader.next();
 *          writer.write(record);
 *      }
 *      writer.close();
 *
 * 
*

*

* To perform a character conversion like MARC-8 to UCS/Unicode register a * CharConverter: *

*

* *

 * writer.setConverter(new AnselToUnicode());
 * 
*

*

* In addition you can perform Unicode normalization. This is for example not * done by the MARC-8 to UCS/Unicode converter. With Unicode normalization text * is transformed into the canonical composed form. For example "a�bc" * is normalized to "�bc". To perform normalization set Unicode * normalization to true: *

*

* *

 * writer.setUnicodeNormalization(true);
 * 
*

*

* Please note that it's not garanteed to work if you try to convert normalized * Unicode back to MARC-8 encoding using * {@link info.freelibrary.marc4j.converter.impl.UnicodeToAnsel}. *

*

* This class provides very basic formatting options. For more advanced options * create an instance of this class with a * {@link javax.xml.transform.sax.SAXResult} containing a * {@link org.xml.sax.ContentHandler} derived from a dedicated XML * serializer. *

*

*

* The following example uses * org.apache.xml.serialize.XMLSerializer to write MARC records to * XML using MARC-8 to UCS/Unicode conversion and Unicode normalization: *

*

* *

 *
 *      InputStream input = new FileInputStream("input.mrc")
 *      MarcReader reader = new MarcStreamReader(input);
 *
 *      OutputFormat format = new OutputFormat("xml","UTF-8", true);
 *      OutputStream out = new FileOutputStream("output.xml");
 *      XMLSerializer serializer = new XMLSerializer(out, format);
 *      Result result = new SAXResult(serializer.asContentHandler());
 *
 *      MarcXmlWriter writer = new MarcXmlWriter(result);
 *      writer.setConverter(new AnselToUnicode());
 *      while (reader.hasNext()) {
 *          Record record = reader.next();
 *          writer.write(record);
 *      }
 *      writer.close();
 *
 * 
*

*

* You can post-process the result using a Source object pointing * to a stylesheet resource and a Result object to hold the * transformation result tree. The example below converts MARC to MARCXML and * transforms the result tree to MODS using the stylesheet provided by The * Library of Congress: *

*

* *

 *
 *      String stylesheetUrl = "http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3.xsl";
 *      Source stylesheet = new StreamSource(stylesheetUrl);
 *
 *      Result result = new StreamResult(System.out);
 *
 *      InputStream input = new FileInputStream("input.mrc")
 *      MarcReader reader = new MarcStreamReader(input);
 *      MarcXmlWriter writer = new MarcXmlWriter(result, stylesheet);
 *      writer.setConverter(new AnselToUnicode());
 *      while (reader.hasNext()) {
 *          Record record = (Record) reader.next();
 *          writer.write(record);
 *      }
 *      writer.close();
 *
 * 
*

*

* It is also possible to write the result into a DOM Node: *

*

* *

 *
 *      InputStream input = new FileInputStream("input.mrc")
 *      MarcReader reader = new MarcStreamReader(input);
 *      DOMResult result = new DOMResult();
 *      MarcXmlWriter writer = new MarcXmlWriter(result);
 *      writer.setConverter(new AnselToUnicode());
 *      while (reader.hasNext()) {
 *          Record record = (Record) reader.next();
 *          writer.write(record);
 *      }
 *      writer.close();
 *
 *      Document doc = (Document) result.getNode();
 *
 * 
* * @author Bas Peters */ public class MarcXmlWriter implements MarcWriter { protected static final String CONTROL_FIELD = "controlfield"; protected static final String DATA_FIELD = "datafield"; protected static final String SUBFIELD = "subfield"; protected static final String COLLECTION = "collection"; protected static final String RECORD = "record"; protected static final String LEADER = "leader"; private boolean indent = false; private TransformerHandler handler = null; private Writer writer = null; /** * Character encoding. Default is UTF-8. */ private String encoding = "UTF8"; private CharConverter converter = null; private boolean normalize = false; /** * Constructs an instance with the specified output stream. *

* The default character encoding for UTF-8 is used. * * @throws MarcException */ public MarcXmlWriter(final OutputStream out) { this(out, false); } /** * Constructs an instance with the specified output stream and indentation. *

* The default character encoding for UTF-8 is used. * * @throws MarcException */ public MarcXmlWriter(final OutputStream out, final boolean indent) { this(out, "UTF8", indent); } /** * Constructs an instance with the specified output stream and character * encoding. * * @throws MarcException */ public MarcXmlWriter(final OutputStream out, final String encoding) { this(out, encoding, false); } /** * Constructs an instance with the specified output stream, character * encoding and indentation. * * @throws MarcException */ public MarcXmlWriter(final OutputStream out, final String encoding, final boolean indent) { this.encoding = encoding; if (out == null) { throw new NullPointerException("null OutputStream"); } if (this.encoding == null) { throw new NullPointerException("null encoding"); } try { setIndent(indent); writer = new OutputStreamWriter(out, encoding); writer = new BufferedWriter(writer); setHandler(new StreamResult(writer), null); } catch (final UnsupportedEncodingException e) { throw new MarcException(e.getMessage(), e); } writeStartDocument(); } /** * Constructs an instance with the specified result. * * @param result * @throws SAXException */ public MarcXmlWriter(final Result result) { if (result == null) { throw new NullPointerException("null Result"); } setHandler(result, null); writeStartDocument(); } /** * Constructs an instance with the specified stylesheet location and result. * * @param result * @throws SAXException */ public MarcXmlWriter(final Result result, final String stylesheetUrl) { this(result, new StreamSource(stylesheetUrl)); } /** * Constructs an instance with the specified stylesheet source and result. * * @param result * @throws SAXException */ public MarcXmlWriter(final Result result, final Source stylesheet) { if (stylesheet == null) { throw new NullPointerException("null Source"); } if (result == null) { throw new NullPointerException("null Result"); } setHandler(result, stylesheet); writeStartDocument(); } /** * Closes the writer. */ public void close() { writeEndDocument(); try { if (writer != null) { writer.write("\n"); writer.close(); } } catch (final IOException e) { throw new MarcException(e.getMessage(), e); } } /** * Returns the character converter. * * @return CharConverter the character converter */ public CharConverter getConverter() { return converter; } /** * Sets the character converter. * * @param converter the character converter */ public void setConverter(final CharConverter converter) { this.converter = converter; } /** * If set to true this writer will perform Unicode normalization on data * elements using normalization form C (NFC). The default is false. *

* The implementation used is ICU4J 2.6. This version is based on Unicode * 4.0. * * @param normalize true if this writer performs Unicode normalization, * false otherwise */ public void setUnicodeNormalization(final boolean normalize) { this.normalize = normalize; } /** * Returns true if this writer will perform Unicode normalization, false * otherwise. * * @return boolean - true if this writer performs Unicode normalization, * false otherwise. */ public boolean getUnicodeNormalization() { return normalize; } protected void setHandler(final Result result, final Source stylesheet) throws MarcException { try { final TransformerFactory factory = TransformerFactory.newInstance(); if (!factory.getFeature(SAXTransformerFactory.FEATURE)) { throw new UnsupportedOperationException( "SAXTransformerFactory is not supported"); } final SAXTransformerFactory saxFactory = (SAXTransformerFactory) factory; if (stylesheet == null) { handler = saxFactory.newTransformerHandler(); } else { handler = saxFactory.newTransformerHandler(stylesheet); } handler.getTransformer() .setOutputProperty(OutputKeys.METHOD, "xml"); handler.setResult(result); } catch (final Exception e) { throw new MarcException(e.getMessage(), e); } } /** * Writes the root start tag to the result. * * @throws SAXException */ protected void writeStartDocument() { try { final AttributesImpl atts = new AttributesImpl(); handler.startDocument(); handler.startElement(Constants.MARCXML_NS_URI, COLLECTION, COLLECTION, atts); } catch (final SAXException e) { throw new MarcException( "SAX error occured while writing start document", e); } } /** * Writes the root end tag to the result. * * @throws SAXException */ protected void writeEndDocument() { try { if (indent) { handler.ignorableWhitespace("\n".toCharArray(), 0, 1); } handler.endElement(Constants.MARCXML_NS_URI, COLLECTION, COLLECTION); handler.endPrefixMapping(""); handler.endDocument(); } catch (final SAXException e) { throw new MarcException( "SAX error occured while writing end document", e); } } /** * Writes a Record object to the result. * * @param record - the Record object * @throws SAXException */ public void write(final Record record) { try { toXml(record); } catch (final SAXException e) { throw new MarcException("SAX error occured while writing record", e); } } /** * Returns true if indentation is active, false otherwise. * * @return boolean */ public boolean hasIndent() { return indent; } /** * Activates or deactivates indentation. Default value is false. * * @param indent */ public void setIndent(final boolean indent) { this.indent = indent; } protected void toXml(final Record record) throws SAXException { if (!MarcFactory.newInstance().validateRecord(record)) { throw new MarcException("Marc record didn't validate"); } char temp[]; AttributesImpl atts = new AttributesImpl(); if (indent) { handler.ignorableWhitespace("\n ".toCharArray(), 0, 3); } handler.startElement(Constants.MARCXML_NS_URI, RECORD, RECORD, atts); if (indent) { handler.ignorableWhitespace("\n ".toCharArray(), 0, 5); } handler.startElement(Constants.MARCXML_NS_URI, LEADER, LEADER, atts); final Leader leader = record.getLeader(); temp = leader.toString().toCharArray(); handler.characters(temp, 0, temp.length); handler.endElement(Constants.MARCXML_NS_URI, LEADER, LEADER); for (final ControlField field : record.getControlFields()) { atts = new AttributesImpl(); atts.addAttribute("", "tag", "tag", "CDATA", field.getTag()); if (indent) { handler.ignorableWhitespace("\n ".toCharArray(), 0, 5); } handler.startElement(Constants.MARCXML_NS_URI, CONTROL_FIELD, CONTROL_FIELD, atts); temp = getDataElement(field.getData()); handler.characters(temp, 0, temp.length); handler.endElement(Constants.MARCXML_NS_URI, CONTROL_FIELD, CONTROL_FIELD); } for (final DataField field : record.getDataFields()) { atts = new AttributesImpl(); atts.addAttribute("", "tag", "tag", "CDATA", field.getTag()); atts.addAttribute("", "ind1", "ind1", "CDATA", String.valueOf(field .getIndicator1())); atts.addAttribute("", "ind2", "ind2", "CDATA", String.valueOf(field .getIndicator2())); if (indent) { handler.ignorableWhitespace("\n ".toCharArray(), 0, 5); } handler.startElement(Constants.MARCXML_NS_URI, DATA_FIELD, DATA_FIELD, atts); for (final Subfield subfield : field.getSubfields()) { atts = new AttributesImpl(); atts.addAttribute("", "code", "code", "CDATA", String .valueOf(subfield.getCode())); if (indent) { handler.ignorableWhitespace("\n ".toCharArray(), 0, 7); } handler.startElement(Constants.MARCXML_NS_URI, SUBFIELD, SUBFIELD, atts); temp = getDataElement(subfield.getData()); handler.characters(temp, 0, temp.length); handler.endElement(Constants.MARCXML_NS_URI, SUBFIELD, SUBFIELD); } if (indent) { handler.ignorableWhitespace("\n ".toCharArray(), 0, 5); } handler.endElement(Constants.MARCXML_NS_URI, DATA_FIELD, DATA_FIELD); } if (indent) { handler.ignorableWhitespace("\n ".toCharArray(), 0, 3); } handler.endElement(Constants.MARCXML_NS_URI, RECORD, RECORD); } protected char[] getDataElement(final String data) { String dataElement = null; if (converter == null) { dataElement = data; } else { dataElement = converter.convert(data); } if (normalize) { dataElement = Normalizer.normalize(dataElement, Normalizer.NFC); } return dataElement.toCharArray(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy