com.uber.hoodie.func.LazyIterableIterator Maven / Gradle / Ivy
/*
* Copyright (c) 2016 Uber Technologies, Inc. ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.func;
import java.util.Iterator;
/**
* (NOTE: Adapted from Apache SystemML) This class is a generic base class for lazy, single pass
* inputItr classes in order to simplify the implementation of lazy iterators for mapPartitions use
* cases. Note [SPARK-3369], which gives the reasons for backwards compatibility with regard to the
* iterable API despite Spark's single pass nature.
*
* Provide a way to obtain a inputItr of type O (output), out of an inputItr of type I (input)
*
* Things to remember: - Assumes Spark calls hasNext() to check for elements, before calling next()
* to obtain them - Assumes hasNext() gets called atleast once. - Concrete Implementation is
* responsible for calling inputIterator.next() and doing the processing in computeNext()
*/
public abstract class LazyIterableIterator implements Iterable, Iterator {
protected Iterator inputItr = null;
private boolean consumed = false;
private boolean startCalled = false;
private boolean endCalled = false;
public LazyIterableIterator(Iterator in) {
inputItr = in;
}
/**
* Called once, before any elements are processed
*/
protected abstract void start();
/**
* Block computation to be overwritten by sub classes.
*/
protected abstract O computeNext();
/**
* Called once, after all elements are processed.
*/
protected abstract void end();
//////////////////
// iterable implementation
private void invokeStartIfNeeded() {
if (!startCalled) {
startCalled = true;
try {
start();
} catch (Exception e) {
throw new RuntimeException("Error in start()");
}
}
}
private void invokeEndIfNeeded() {
// make the calls out to begin() & end()
if (!endCalled) {
endCalled = true;
// if we are out of elements, and end has not been called yet
try {
end();
} catch (Exception e) {
throw new RuntimeException("Error in end()");
}
}
}
@Override
public Iterator iterator() {
//check for consumed inputItr
if (consumed) {
throw new RuntimeException("Invalid repeated inputItr consumption.");
}
//hand out self as inputItr exactly once (note: do not hand out the input
//inputItr since it is consumed by the self inputItr implementation)
consumed = true;
return this;
}
//////////////////
// inputItr implementation
@Override
public boolean hasNext() {
boolean ret = inputItr.hasNext();
// make sure, there is exactly one call to start()
invokeStartIfNeeded();
if (!ret) {
// if we are out of elements, and end has not been called yet
invokeEndIfNeeded();
}
return ret;
}
@Override
public O next() {
try {
return computeNext();
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
@Override
public void remove() {
throw new RuntimeException("Unsupported remove operation.");
}
}