All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.gdcc.xoai.xmlio.XmlReader Maven / Gradle / Ivy

Go to download

Basic XML IO routines used for XOAI OAI-PMH implementation. Forked from obsolete Lyncode sources.

There is a newer version: 5.2.2
Show newest version
/**
 * Copyright 2012 Lyncode
 *
 * 

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of the License at * *

http://www.apache.org/licenses/LICENSE-2.0 * *

Unless required by applicable law or agreed to in writing, software distributed under the * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing permissions and * limitations under the License. */ package io.gdcc.xoai.xmlio; import static io.gdcc.xoai.xmlio.matchers.XmlEventMatchers.aStartElement; import static io.gdcc.xoai.xmlio.matchers.XmlEventMatchers.text; import static org.hamcrest.CoreMatchers.anyOf; import static org.hamcrest.CoreMatchers.not; import io.gdcc.xoai.xmlio.exceptions.XmlReaderException; import io.gdcc.xoai.xmlio.matchers.extractor.ExtractFunction; import java.io.ByteArrayOutputStream; import java.io.InputStream; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import javax.xml.namespace.QName; import javax.xml.stream.XMLEventReader; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.events.Attribute; import javax.xml.stream.events.Namespace; import javax.xml.stream.events.StartElement; import javax.xml.stream.events.XMLEvent; import org.codehaus.stax2.XMLInputFactory2; import org.hamcrest.Description; import org.hamcrest.Matcher; import org.hamcrest.TypeSafeMatcher; public class XmlReader implements AutoCloseable { // Using the STaX2 API here, but hiding behind STaX1 // Ignore SonarCloud warning here - it's used the way the library says we must use it. // https://sonarcloud.io/organizations/gdcc/rules?open=java%3AS3252&rule_key=java%3AS3252 @SuppressWarnings("java:S3252") private static final XMLInputFactory XML_INPUT_FACTORY = XMLInputFactory2.newFactory(); private final XMLEventReader xmlEventParser; public XmlReader(final InputStream stream) throws XmlReaderException { try { this.xmlEventParser = XML_INPUT_FACTORY.createXMLEventReader(stream); } catch (XMLStreamException e) { throw new XmlReaderException(e); } } public boolean current(Matcher matcher) throws XmlReaderException { return matcher.matches(peek()); } public void close() throws XmlReaderException { try { xmlEventParser.close(); } catch (XMLStreamException e) { throw new XmlReaderException(e); } } public QName getName() throws XmlReaderException { if (peek().isStartElement()) return peek().asStartElement().getName(); else if (peek().isEndElement()) return peek().asEndElement().getName(); else throw new XmlReaderException("Current event has no name"); } public String getText() throws XmlReaderException { if (current(text())) return peek().asCharacters().getData(); else throw new XmlReaderException("Current element is not text"); } public String getAttributeValue(Matcher nameMatcher) throws XmlReaderException { if (peek().isStartElement()) { Iterator attributes = peek().asStartElement().getAttributes(); while (attributes.hasNext()) { Attribute attribute = attributes.next(); if (nameMatcher.matches(attribute.getName())) return attribute.getValue(); } } return null; } public Map getAttributes(ExtractFunction extractFunction) throws XmlReaderException { HashMap map = new HashMap<>(); if (peek().isStartElement()) { Iterator attributes = peek().asStartElement().getAttributes(); while (attributes.hasNext()) { Attribute attribute = attributes.next(); map.put(extractFunction.apply(attribute.getName()), attribute.getValue()); } } return map; } public boolean hasAttribute(final Matcher matcher) throws XmlReaderException { return hasAttributeMatcher(matcher).matches(peek().asStartElement().getAttributes()); } private TypeSafeMatcher> hasAttributeMatcher( final Matcher matcher) { return new TypeSafeMatcher<>() { @Override public void describeTo(Description description) { description.appendText("has attribute"); } @Override protected boolean matchesSafely(Iterator item) { while (item.hasNext()) if (matcher.matches(item.next())) return true; return false; } }; } @SafeVarargs public final XmlReader next(Matcher... possibleEvents) throws XmlReaderException { try { xmlEventParser.nextEvent(); while (!anyOf(possibleEvents).matches(peek())) xmlEventParser.nextEvent(); return this; } catch (XMLStreamException e) { throw new XmlReaderException(e); } } public T get(IslandParser islandParser) throws XmlReaderException { return islandParser.parse(this); } private XMLEvent peek() throws XmlReaderException { try { return this.xmlEventParser.peek(); } catch (XMLStreamException e) { throw new XmlReaderException(e); } } public XMLEvent nextEvent() throws XmlReaderException { try { return this.xmlEventParser.nextEvent(); } catch (XMLStreamException e) { throw new XmlReaderException(e); } } public boolean hasNext() { return this.xmlEventParser.hasNext(); } public String retrieveCurrentAsString() throws XmlReaderException { ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); try { if (current(not(aStartElement()))) throw new XmlReaderException("Can only retrieve from starting elements"); XmlIoWriter writer = new XmlIoWriter(outputStream); int depth = 0; while (xmlEventParser.peek() != null) { XMLEvent event = xmlEventParser.peek(); if (event.isStartElement()) { depth++; StartElement start = event.asStartElement(); writer.writeStartElement( start.getName().getPrefix(), start.getName().getLocalPart(), start.getName().getNamespaceURI()); Iterator it = start.getNamespaces(); while (it.hasNext()) { Namespace n = it.next(); writer.writeNamespace(n.getPrefix(), n.getNamespaceURI()); } Iterator attrs = start.getAttributes(); while (attrs.hasNext()) { Attribute attr = attrs.next(); writer.writeAttribute( attr.getName().getPrefix(), attr.getName().getNamespaceURI(), attr.getName().getLocalPart(), attr.getValue()); } } else if (event.isEndElement()) { /* This was the original content. It lead to a breaking test in xoai-service-provider/RecordParserTest, where the end element of an element was not closed because the end element was not written due to count already being 0 when reaching the if clause. Leaving this here when we see any troubles arising from this change. count--; if (count == 0) { break; } else writer.writeEndElement(); */ // write the closing tag and decrease the depth writer.writeEndElement(); depth -= 1; // if this was the end element of the outermost XML layer, break the loop here if (depth == 0) { break; } } else if (event.isCharacters()) { writer.writeCharacters(event.asCharacters().getData()); } if (xmlEventParser.hasNext()) xmlEventParser.nextEvent(); else break; } // when there wasn't enough closing elements, we're doomed. if (depth > 0) throw new XmlReaderException("Unterminated structure"); writer.flush(); writer.close(); return outputStream.toString(); } catch (XMLStreamException e) { throw new XmlReaderException(e); } } public interface IslandParser { T parse(XmlReader reader) throws XmlReaderException; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy