com.tectonica.xmlchunk.XmlChunkerIterator Maven / Gradle / Ivy
/*
* Copyright (C) 2012-2024 Zach Melamed
*
* Latest version available online at https://github.com/zach-m/jonix
* Contact me at [email protected]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.tectonica.xmlchunk;
import org.w3c.dom.Element;
import javax.xml.stream.XMLStreamException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.NoSuchElementException;
// CHECKSTYLE:OFF
/**
* An iterator for XML data extraction, intended for XML source that has the following properties:
*
*
- May be infinitely large (can't be held in memory in its entirety)
- Has a repetitive structure, where
* sub-XML records of interest are all located at some constant depth/level
*
* The XML source will be broken into 'chunks', each representing one XML sub-tree positioned at the target depth
* (assuming it is small enough to fit in memory). The chunk will be returned by this iterator's {@link #next()} method
* as an in-memory DOM {@link Element}.
*
* For example, given the following XML:
*
*
* <?xml version="1.0" encoding="UTF-8"?>
* <Level1>
* <Level2a>
* ..
* <Level3a>
* ..
* <Level4>
* ..
* </Level4>
* ..
* </Level3a>
*
* <Level3b>
* ..
* </Level3b>
* ..
* </Level2a>
*
* <Level2b>
* ..
* </Level2b>
* </Level1> *
*
*
* Requesting a target depth of 2 would yield two chunks, {@code .. } (including its entire sub-tree),
* and {@code .. }.
*
* @author Zach Melamed
*/
// CHECKSTYLE:ON
public class XmlChunkerIterator implements Iterator {
private final XmlChunkerContext ctx;
private Element nextChunk;
public XmlChunkerIterator(InputStream is, String encoding, int targetDepth) throws XMLStreamException {
ctx = new XmlChunkerContext(is, encoding, targetDepth);
nextChunk = ctx.nextChunk();
}
@Override
public boolean hasNext() {
return (nextChunk != null);
}
@Override
public Element next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
Element chunk = nextChunk;
try {
nextChunk = ctx.nextChunk();
} catch (XMLStreamException e) {
throw new RuntimeException(e);
}
return chunk;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}