eu.stratosphere.core.testing.GenericTestRecords Maven / Gradle / Ivy
The newest version!
/***********************************************************************************************************************
*
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.core.testing;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.junit.Assert;
import eu.stratosphere.api.common.io.FileInputFormat;
import eu.stratosphere.api.common.io.FormatUtil;
import eu.stratosphere.api.common.io.GenericInputFormat;
import eu.stratosphere.api.common.typeutils.TypeComparator;
import eu.stratosphere.api.common.typeutils.TypePairComparator;
import eu.stratosphere.api.common.typeutils.TypeSerializer;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.core.testing.io.SequentialOutputFormat;
import eu.stratosphere.nephele.services.memorymanager.MemoryAllocationException;
import eu.stratosphere.nephele.template.AbstractTask;
import eu.stratosphere.pact.runtime.sort.UnilateralSortMerger;
import eu.stratosphere.util.MutableObjectIterator;
import eu.stratosphere.util.StringUtils;
/**
* Base class for representing the input or output values of a {@link GenericTestPlan}. The class is
* especially important when setting the expected values in the GenericTestPlan.
*
* There are two ways to specify the values:
*
* - From a file: with {@link #load(Class, Configuration)} and {@link #load(Class, String, Configuration)} the
* location, format, and configuration of the data can be specified. The file is lazily loaded and thus can be
* comparable large.
*
- Ad-hoc: Key/value records can be added with {@link #add(Object...)}, {@link #add(Iterable)}, and
* {@link #add(GenericTestRecords)}. Please note that the actual amount of records for a test case as the
* GenericTestPlan already involves a certain degree of overhead.
*
* GenericTestRecords are directly comparable with equals and hashCode based on its content. Please note that in the
* case of large file-based GenericTestRecords, the time needed to compute the {@link #hashCode()} or to compare two
* instances with {@link #equals(Object)} can become quite long. Currently, the comparison result is order-dependent as
* GenericTestRecords are interpreted as a bags.
*
* @param
* the record type
*/
public class GenericTestRecords implements Closeable, Iterable {
private final Iterator EMPTY_ITERATOR = new ArrayList().iterator();
private Configuration configuration;
private Class extends GenericInputFormat> inputFormatClass;
private final List records = new ArrayList();
private String path;
private final ClosableManager closableManager = new ClosableManager();
private boolean empty;
private TypeConfig typeConfig;
/**
* Initializes GenericTestRecords with the given {@link TypeConfig}.
*
* @param typeConfig
*/
public GenericTestRecords(final TypeConfig typeConfig) {
this.setTypeConfig(typeConfig);
}
/**
* Initializes GenericTestRecords.
*/
protected GenericTestRecords() {
}
/**
* Adds several records at once.
*
* @param records
* the records to add
* @return this
*/
public GenericTestRecords add(final GenericTestRecords records) {
if (records.isEmpty())
this.setEmpty();
else {
for (final T record : records)
this.records.add(record);
this.setEmpty(false);
records.close();
}
return this;
}
/**
* Adds several records at once.
*
* @param records
* the records to add
* @return this
*/
public GenericTestRecords add(final Iterable extends T> records) {
for (final T record : records)
this.records.add(record);
this.setEmpty(false);
this.inputFormatClass = null;
return this;
}
/**
* Adds several records at once.
*
* @param records
* the records to add
* @return this
*/
public GenericTestRecords add(@SuppressWarnings("unchecked") final T... records) {
for (final T record : records)
this.records.add(record);
this.setEmpty(false);
return this;
}
/**
* Asserts that the contained set of records is equal to the set of records of the given {@link GenericTestRecords}.
*
* @param expectedValues
* the other GenericTestRecords defining the expected result
* @throws AssertionError
* if the sets differ
*/
public void assertEquals(final GenericTestRecords expectedValues) throws AssertionError {
new GenericTestRecordsAssertor(expectedValues.getTypeConfig(), expectedValues, this).assertEquals();
}
@Override
public void close() {
try {
this.closableManager.close();
} catch (final IOException e) {
}
}
/*
* (non-Javadoc)
* @see java.lang.Object#equals(java.lang.Object)
*/
@Override
public boolean equals(final Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (this.getClass() != obj.getClass())
return false;
@SuppressWarnings("unchecked")
final GenericTestRecords other = (GenericTestRecords) obj;
try {
other.assertEquals(this);
} catch (final AssertionError e) {
return false;
}
return true;
}
/**
* Returns the typeConfig.
*
* @return the typeConfig
*/
public TypeConfig getTypeConfig() {
return this.typeConfig;
}
/*
* (non-Javadoc)
* @see java.lang.Object#hashCode()
*/
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
final Iterator iterator = this.iterator();
while (iterator.hasNext())
result = prime * result + iterator.next().hashCode();
return result;
}
/**
* Returns true if any add method has been called at least one.
*
* @return true if records were specified in an ad-hoc manner
*/
public boolean isAdhoc() {
return !this.records.isEmpty();
}
/**
* Returns true if either records were added manually or with {@link #load(Class, Configuration)}.
*
* @return true if either records were added manually or with {@link #load(Class, Configuration)}.
*/
public boolean isInitialized() {
return this.isEmpty() || !this.records.isEmpty() || this.inputFormatClass != null;
}
@Override
public Iterator iterator() {
return this.iterator(this.typeConfig);
}
/**
* Retrieves an iterator over all entries sorted according to the provided {@link TypeConfig}.
*
* @param typeConfig
* @return the iterator
*/
public Iterator iterator(final TypeConfig typeConfig) {
if (this.isEmpty() || !this.isInitialized())
return this.EMPTY_ITERATOR;
if (typeConfig == null)
throw new IllegalArgumentException(
"No type configuration given. Please set default config for the TestPlan with TestPlan#setTypeConfig or specify them when accessing the inputs/outputs");
if (this.isAdhoc()) {
final TypePairComparator typePairComparator = typeConfig.getTypePairComparator();
Collections.sort(this.records, new Comparator() {
@Override
public int compare(final T o1, final T o2) {
typePairComparator.setReference(o2);
return typePairComparator.compareToReference(o1);
}
});
return this.records.iterator();
}
if (this.path != null) {
final InputIterator inputFileIterator = this.getInputFileIterator(typeConfig);
if (!inputFileIterator.hasNext())
return inputFileIterator;
return this.createSortedIterator(inputFileIterator, typeConfig);
}
try {
return this.createSortedIterator(
new InputIterator(typeConfig.getTypeSerializer(),
FormatUtil.openInput(this.inputFormatClass, this.configuration)), typeConfig);
} catch (final IOException e) {
Assert.fail("creating input format " + StringUtils.stringifyException(e));
return null;
}
}
/**
* Initializes this {@link GenericTestRecords} from the given file.
*
* @param inputFormatClass
* the class of the {@link FileInputFormat}
* @param file
* the path to the file, can be relative
* @param configuration
* the configuration for the {@link FileInputFormat}.
* @return this
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public GenericTestRecords load(final Class extends FileInputFormat> inputFormatClass,
final String file, final Configuration configuration) {
this.path = file;
this.inputFormatClass = (Class) inputFormatClass;
this.configuration = configuration;
this.setEmpty(false);
this.records.clear();
return this;
}
/**
* Initializes this {@link GenericTestRecords} from the given generic input.
*
* @param inputFormatClass
* the class of the {@link FileInputFormat}
* @param configuration
* the configuration for the {@link FileInputFormat}.
* @return this
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
public GenericTestRecords load(final Class extends GenericInputFormat> inputFormatClass,
final Configuration configuration) {
this.path = null;
this.inputFormatClass = (Class extends GenericInputFormat>) inputFormatClass;
this.configuration = configuration;
this.setEmpty(false);
this.records.clear();
return this;
}
/**
* Saves the data to the given path in an internal format.
*
* @param path
* the path to write to, may be relative
* @throws IOException
* if an I/O error occurred
*/
public void saveToFile(final String path) throws IOException {
final Configuration configuration = new Configuration();
SequentialOutputFormat.configureSequentialFormat(configuration).typeSerializer(
this.getTypeConfig().getTypeSerializerFactory());
final SequentialOutputFormat outputFormat =
FormatUtil.openOutput(SequentialOutputFormat.class, path, configuration);
final Iterator iterator = this.iterator();
while (iterator.hasNext())
outputFormat.writeRecord(iterator.next());
outputFormat.close();
}
/**
* Specifies that the set of key/value records is empty. This method is primarily used to distinguish between an
* empty
* uninitialized set and a set deliberately left empty. Further calls to {@link #add(Object...)} will reset the
* effect of this method invocation and vice-versa.
*/
public void setEmpty() {
this.setEmpty(true);
this.inputFormatClass = null;
this.records.clear();
}
/**
* Sets the typeConfig to the specified value.
*
* @param typeConfig
* the typeConfig to set
*/
public void setTypeConfig(final TypeConfig typeConfig) {
this.typeConfig = typeConfig;
}
@Override
public String toString() {
final StringBuilder stringBuilder = new StringBuilder("TestRecords: ");
final Iterator iterator = this.iterator();
try {
for (int index = 0; index < 25 && iterator.hasNext(); index++) {
if (index > 0)
stringBuilder.append("; ");
this.typeConfig.getTypeStringifier().appendAsString(stringBuilder, iterator.next());
}
} catch (final IOException e) {
}
if (iterator.hasNext())
stringBuilder.append("...");
return stringBuilder.toString();
}
/**
* Returns an Iterator that iterates through the records as they are provided by the ad hoc expression or the file.
* Note that this iterator is not guaranteed to provide a result that is different from {@link #iterator()}.
* It simply skips the sorting step, it will not shuffle sorted records.
*
* @return an Iterator that iterates through the records as they are provided by the ad hoc expression or the file.
*/
public Iterator unsortedIterator() {
if (this.isEmpty() || !this.isInitialized())
return this.EMPTY_ITERATOR;
if (this.isAdhoc())
return this.records.iterator();
if (this.inputFormatClass != null)
return this.getInputFileIterator(this.typeConfig);
return this.EMPTY_ITERATOR;
}
/**
* Returns the records.
*
* @return the records
*/
protected List getRecords() {
return this.records;
}
/**
* Uses {@link UnilateralSortMerger} to sort the files of the {@link SplitInputIterator}.
*/
private Iterator createSortedIterator(final Iterator inputFileIterator, final TypeConfig typeConfig) {
final int memSize = 10;
try {
final StringBuilder testName = new StringBuilder();
final StackTraceElement[] stackTrace = new Throwable().getStackTrace();
for (int index = 0; index < stackTrace.length; index++)
if (!stackTrace[index].getClassName().startsWith("eu.stratosphere.core.testing.")) {
testName.append(stackTrace[index].toString());
break;
}
// instantiate a sort-merger
final AbstractTask parentTask = new AbstractTask() {
@Override
public void invoke() throws Exception {
}
@Override
public void registerInputOutput() {
}
@Override
public String toString() {
return "TestPair Sorter " + testName;
}
};
final TypeComparator comparator = typeConfig.getTypeComparator();
final TypeSerializer serializer = typeConfig.getTypeSerializerFactory().getSerializer();
final UnilateralSortMerger sortMerger =
new UnilateralSortMerger(TestEnvironment.getInstance().getMemoryManager(),
TestEnvironment.getInstance().getIoManager(), new TestRecordReader(inputFileIterator), parentTask,
serializer, comparator, memSize * 1024L * 1024L, 2, 0.7f);
this.closableManager.add(sortMerger);
// obtain and return a grouped iterator from the sort-merger
return new ImmutableRecordIterator(serializer, sortMerger.getIterator());
} catch (final MemoryAllocationException mae) {
throw new RuntimeException(
"MemoryManager is not able to provide the required amount of memory for ReduceTask", mae);
} catch (final IOException ioe) {
throw new RuntimeException("IOException caught when obtaining SortMerger for ReduceTask", ioe);
} catch (final InterruptedException iex) {
throw new RuntimeException("InterruptedException caught when obtaining iterator over sorted data.", iex);
}
}
@SuppressWarnings({ "unchecked", "rawtypes" })
private InputIterator getInputFileIterator(final TypeConfig typeConfig) {
final InputIterator inputFileIterator;
try {
inputFileIterator = new InputIterator(typeConfig.getTypeSerializer(),
FormatUtil.openAllInputs((Class) this.inputFormatClass, this.path, this.configuration));
} catch (final IOException e) {
Assert.fail("reading values from " + this.path + ": " + StringUtils.stringifyException(e));
return null;
} catch (final Exception e) {
Assert.fail("creating input format " + StringUtils.stringifyException(e));
return null;
}
return inputFileIterator;
}
private boolean isEmpty() {
return this.empty;
}
// protected Iterator getUnsortedIterator() {
// if (this.isEmpty())
// return this.EMPTY_ITERATOR;
// if (this.isAdhoc())
// return this.records.iterator();
// if (this.inputFormatClass != null)
// return this.getInputFileIterator();
// return this.EMPTY_ITERATOR;
// }
private void setEmpty(final boolean empty) {
this.empty = empty;
}
private final class TestRecordReader implements MutableObjectIterator {
private final Iterator inputFileIterator;
private final TypeSerializer typeSerializer = GenericTestRecords.this.typeConfig.getTypeSerializer();
private TestRecordReader(final Iterator inputFileIterator) {
this.inputFileIterator = inputFileIterator;
}
/*
* (non-Javadoc)
* @see eu.stratosphere.util.MutableObjectIterator#next(java.lang.Object)
*/
@Override
public boolean next(final T target) throws IOException {
if (this.inputFileIterator.hasNext()) {
this.typeSerializer.copyTo(this.inputFileIterator.next(), target);
return true;
}
return false;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy