All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.tectonica.xmlchunk.XmlChunkerIterator Maven / Gradle / Ivy

/*
 * Copyright (C) 2012-2024 Zach Melamed
 *
 * Latest version available online at https://github.com/zach-m/jonix
 * Contact me at [email protected]
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.tectonica.xmlchunk;

import org.w3c.dom.Element;

import javax.xml.stream.XMLStreamException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.NoSuchElementException;

// CHECKSTYLE:OFF

/**
 * An iterator for XML data extraction, intended for XML source that has the following properties:
 * 

*

  • May be infinitely large (can't be held in memory in its entirety)
  • Has a repetitive structure, where * sub-XML records of interest are all located at some constant depth/level
*

* The XML source will be broken into 'chunks', each representing one XML sub-tree positioned at the target depth * (assuming it is small enough to fit in memory). The chunk will be returned by this iterator's {@link #next()} method * as an in-memory DOM {@link Element}. *

* For example, given the following XML: *

*

 * <?xml version="1.0" encoding="UTF-8"?>
 * <Level1>
 *     <Level2a>
 *         ..
 *         <Level3a>
 *             ..
 *             <Level4>
 *                 ..
 *             </Level4>
 *             ..
 *         </Level3a>
 *
 *         <Level3b>
 *             ..
 *         </Level3b>
 *         ..
 *     </Level2a>
 *
 *     <Level2b>
 *     ..
 *     </Level2b>
 * </Level1> *
 * 
*

* Requesting a target depth of 2 would yield two chunks, {@code ..} (including its entire sub-tree), * and {@code ..}. * * @author Zach Melamed */ // CHECKSTYLE:ON public class XmlChunkerIterator implements Iterator { private final XmlChunkerContext ctx; private Element nextChunk; public XmlChunkerIterator(InputStream is, String encoding, int targetDepth) throws XMLStreamException { ctx = new XmlChunkerContext(is, encoding, targetDepth); nextChunk = ctx.nextChunk(); } @Override public boolean hasNext() { return (nextChunk != null); } @Override public Element next() { if (!hasNext()) { throw new NoSuchElementException(); } Element chunk = nextChunk; try { nextChunk = ctx.nextChunk(); } catch (XMLStreamException e) { throw new RuntimeException(e); } return chunk; } @Override public void remove() { throw new UnsupportedOperationException(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy