All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.nhs.ciao.docs.parser.MultiDocumentParser Maven / Gradle / Ivy

The newest version!
package uk.nhs.ciao.docs.parser;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import uk.nhs.ciao.io.MultiCauseIOException;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.io.ByteStreams;

/**
 * A document parser which attempts to parse the document using multiple
 * delegate parsers.
 * 

* The parsers are attempted in registration order until one parser completes a successful * parse. */ public final class MultiDocumentParser implements DocumentParser { private static final Logger LOGGER = LoggerFactory.getLogger(MultiDocumentParser.class); private final Set parsers; /** * Constructs a new empty multi-document parser. Delegate parsers can be added later * after construction. */ public MultiDocumentParser() { parsers = Sets.newLinkedHashSet(); } /** * Constructs a new multi-document parser which delegates to the specified parsers * when attempting to parse documents. Additional delegate parsers can be added * later after construction. *

* Delegate parsers are attempted in registration order. * * @param parsers The delegate parsers to register */ public MultiDocumentParser(final DocumentParser... parsers) { this(); addParsers(parsers); } /** * Registers the specified parser as a delegate to use when attempting to parse documents. *

* Delegate parsers are attempted in registration order * * @param parser The delegate parser to register */ public void addParser(final DocumentParser parser) { if (parser != null) { parsers.add(parser); } } /** * Registers the specified parsers as delegates to use when attempting to parse documents. *

* Delegate parsers are attempted in registration order * * @param parser The delegate parsers to register */ public void addParsers(final Iterable parsers) { for (final DocumentParser parser: parsers) { addParser(parser); } } /** * Registers the specified parsers as delegates to use when attempting to parse documents. *

* Delegate parsers are attempted in registration order * * @param parser The delegate parsers to register */ public void addParsers(final DocumentParser... parsers) { for (final DocumentParser parser: parsers) { addParser(parser); } } /** * {@inheritDoc} * * @throws UnsupportedDocumentTypeException If no registered parser supports the document type, or if * no parses are registered * @throws MultiCauseIOException When all parsers failed and at least one failed with an IOException */ @Override public Map parseDocument(final InputStream in) throws UnsupportedDocumentTypeException, IOException { final Map properties; if (parsers.isEmpty()) { throw new UnsupportedDocumentTypeException("No parsers are available"); } else if (parsers.size() == 1) { properties = parseDocumentWithSingleParser(in); } else { properties = parseDocumentWithMultipleParsers(in); } return properties; } /** * Delegates the parse to the single registered parser */ private Map parseDocumentWithSingleParser(final InputStream in) throws UnsupportedDocumentTypeException, IOException { final DocumentParser parser = parsers.iterator().next(); return parser.parseDocument(in); } /** * Parses the document when multiple parsers are registered. Each parser is * tried in turn until one succeeds, or all fail. */ private Map parseDocumentWithMultipleParsers(final InputStream in) throws UnsupportedDocumentTypeException, IOException { // cache the input stream (multiple reads may be required) final ByteArrayInputStream cachedInputStream = cacheInputStream(in); final List suppressedExceptions = Lists.newArrayList(); for (final DocumentParser parser: parsers) { try { cachedInputStream.reset(); return parser.parseDocument(cachedInputStream); } catch (final UnsupportedDocumentTypeException e) { LOGGER.trace("Parser {} does not support document type", parser, e); suppressedExceptions.add(e); } catch (final Exception e) { LOGGER.trace("Parser {} failed to parse the document", parser, e); suppressedExceptions.add(e); } } if (onlyContainsUnsupportedDocumentType(suppressedExceptions)) { throw new UnsupportedDocumentTypeException("No parsers support the type of document"); } else { throw new MultiCauseIOException("All parsers failed to parse the document", suppressedExceptions); } } /** * Caches the input stream in memory. *

* This allows the input stream content to be read multiple times. */ private ByteArrayInputStream cacheInputStream(final InputStream in) throws IOException { if (in instanceof ByteArrayInputStream) { return (ByteArrayInputStream)in; } final byte[] bytes = ByteStreams.toByteArray(in); return new ByteArrayInputStream(bytes); } /** * Tests if the specified list only contains UnsupportedDocumentTypeExceptions * * @param suppressedExceptions The list to test * @return true if the list only contains UnsupportedDocumentTypeExceptions, or false otherwise */ private boolean onlyContainsUnsupportedDocumentType(final List suppressedExceptions) { for (final Exception exception: suppressedExceptions) { if (!(exception instanceof UnsupportedDocumentTypeException)) { return false; } } return true; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy