All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.core.testing.GenericTestRecords Maven / Gradle / Ivy

The newest version!
/***********************************************************************************************************************
 *
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 *
 **********************************************************************************************************************/

package eu.stratosphere.core.testing;

import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;

import org.junit.Assert;

import eu.stratosphere.api.common.io.FileInputFormat;
import eu.stratosphere.api.common.io.FormatUtil;
import eu.stratosphere.api.common.io.GenericInputFormat;
import eu.stratosphere.api.common.typeutils.TypeComparator;
import eu.stratosphere.api.common.typeutils.TypePairComparator;
import eu.stratosphere.api.common.typeutils.TypeSerializer;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.core.testing.io.SequentialOutputFormat;
import eu.stratosphere.nephele.services.memorymanager.MemoryAllocationException;
import eu.stratosphere.nephele.template.AbstractTask;
import eu.stratosphere.pact.runtime.sort.UnilateralSortMerger;
import eu.stratosphere.util.MutableObjectIterator;
import eu.stratosphere.util.StringUtils;

/**
 * Base class for representing the input or output values of a {@link GenericTestPlan}. The class is
 * especially important when setting the expected values in the GenericTestPlan.
*
* There are two ways to specify the values: *
    *
  1. From a file: with {@link #load(Class, Configuration)} and {@link #load(Class, String, Configuration)} the * location, format, and configuration of the data can be specified. The file is lazily loaded and thus can be * comparable large. *
  2. Ad-hoc: Key/value records can be added with {@link #add(Object...)}, {@link #add(Iterable)}, and * {@link #add(GenericTestRecords)}. Please note that the actual amount of records for a test case as the * GenericTestPlan already involves a certain degree of overhead.
    *
    * GenericTestRecords are directly comparable with equals and hashCode based on its content. Please note that in the * case of large file-based GenericTestRecords, the time needed to compute the {@link #hashCode()} or to compare two * instances with {@link #equals(Object)} can become quite long. Currently, the comparison result is order-dependent as * GenericTestRecords are interpreted as a bags.
    * * @param * the record type */ public class GenericTestRecords implements Closeable, Iterable { private final Iterator EMPTY_ITERATOR = new ArrayList().iterator(); private Configuration configuration; private Class> inputFormatClass; private final List records = new ArrayList(); private String path; private final ClosableManager closableManager = new ClosableManager(); private boolean empty; private TypeConfig typeConfig; /** * Initializes GenericTestRecords with the given {@link TypeConfig}. * * @param typeConfig */ public GenericTestRecords(final TypeConfig typeConfig) { this.setTypeConfig(typeConfig); } /** * Initializes GenericTestRecords. */ protected GenericTestRecords() { } /** * Adds several records at once. * * @param records * the records to add * @return this */ public GenericTestRecords add(final GenericTestRecords records) { if (records.isEmpty()) this.setEmpty(); else { for (final T record : records) this.records.add(record); this.setEmpty(false); records.close(); } return this; } /** * Adds several records at once. * * @param records * the records to add * @return this */ public GenericTestRecords add(final Iterable records) { for (final T record : records) this.records.add(record); this.setEmpty(false); this.inputFormatClass = null; return this; } /** * Adds several records at once. * * @param records * the records to add * @return this */ public GenericTestRecords add(@SuppressWarnings("unchecked") final T... records) { for (final T record : records) this.records.add(record); this.setEmpty(false); return this; } /** * Asserts that the contained set of records is equal to the set of records of the given {@link GenericTestRecords}. * * @param expectedValues * the other GenericTestRecords defining the expected result * @throws AssertionError * if the sets differ */ public void assertEquals(final GenericTestRecords expectedValues) throws AssertionError { new GenericTestRecordsAssertor(expectedValues.getTypeConfig(), expectedValues, this).assertEquals(); } @Override public void close() { try { this.closableManager.close(); } catch (final IOException e) { } } /* * (non-Javadoc) * @see java.lang.Object#equals(java.lang.Object) */ @Override public boolean equals(final Object obj) { if (this == obj) return true; if (obj == null) return false; if (this.getClass() != obj.getClass()) return false; @SuppressWarnings("unchecked") final GenericTestRecords other = (GenericTestRecords) obj; try { other.assertEquals(this); } catch (final AssertionError e) { return false; } return true; } /** * Returns the typeConfig. * * @return the typeConfig */ public TypeConfig getTypeConfig() { return this.typeConfig; } /* * (non-Javadoc) * @see java.lang.Object#hashCode() */ @Override public int hashCode() { final int prime = 31; int result = 1; final Iterator iterator = this.iterator(); while (iterator.hasNext()) result = prime * result + iterator.next().hashCode(); return result; } /** * Returns true if any add method has been called at least one. * * @return true if records were specified in an ad-hoc manner */ public boolean isAdhoc() { return !this.records.isEmpty(); } /** * Returns true if either records were added manually or with {@link #load(Class, Configuration)}. * * @return true if either records were added manually or with {@link #load(Class, Configuration)}. */ public boolean isInitialized() { return this.isEmpty() || !this.records.isEmpty() || this.inputFormatClass != null; } @Override public Iterator iterator() { return this.iterator(this.typeConfig); } /** * Retrieves an iterator over all entries sorted according to the provided {@link TypeConfig}. * * @param typeConfig * @return the iterator */ public Iterator iterator(final TypeConfig typeConfig) { if (this.isEmpty() || !this.isInitialized()) return this.EMPTY_ITERATOR; if (typeConfig == null) throw new IllegalArgumentException( "No type configuration given. Please set default config for the TestPlan with TestPlan#setTypeConfig or specify them when accessing the inputs/outputs"); if (this.isAdhoc()) { final TypePairComparator typePairComparator = typeConfig.getTypePairComparator(); Collections.sort(this.records, new Comparator() { @Override public int compare(final T o1, final T o2) { typePairComparator.setReference(o2); return typePairComparator.compareToReference(o1); } }); return this.records.iterator(); } if (this.path != null) { final InputIterator inputFileIterator = this.getInputFileIterator(typeConfig); if (!inputFileIterator.hasNext()) return inputFileIterator; return this.createSortedIterator(inputFileIterator, typeConfig); } try { return this.createSortedIterator( new InputIterator(typeConfig.getTypeSerializer(), FormatUtil.openInput(this.inputFormatClass, this.configuration)), typeConfig); } catch (final IOException e) { Assert.fail("creating input format " + StringUtils.stringifyException(e)); return null; } } /** * Initializes this {@link GenericTestRecords} from the given file. * * @param inputFormatClass * the class of the {@link FileInputFormat} * @param file * the path to the file, can be relative * @param configuration * the configuration for the {@link FileInputFormat}. * @return this */ @SuppressWarnings({ "unchecked", "rawtypes" }) public GenericTestRecords load(final Class inputFormatClass, final String file, final Configuration configuration) { this.path = file; this.inputFormatClass = (Class) inputFormatClass; this.configuration = configuration; this.setEmpty(false); this.records.clear(); return this; } /** * Initializes this {@link GenericTestRecords} from the given generic input. * * @param inputFormatClass * the class of the {@link FileInputFormat} * @param configuration * the configuration for the {@link FileInputFormat}. * @return this */ @SuppressWarnings({ "unchecked", "rawtypes" }) public GenericTestRecords load(final Class inputFormatClass, final Configuration configuration) { this.path = null; this.inputFormatClass = (Class>) inputFormatClass; this.configuration = configuration; this.setEmpty(false); this.records.clear(); return this; } /** * Saves the data to the given path in an internal format. * * @param path * the path to write to, may be relative * @throws IOException * if an I/O error occurred */ public void saveToFile(final String path) throws IOException { final Configuration configuration = new Configuration(); SequentialOutputFormat.configureSequentialFormat(configuration).typeSerializer( this.getTypeConfig().getTypeSerializerFactory()); final SequentialOutputFormat outputFormat = FormatUtil.openOutput(SequentialOutputFormat.class, path, configuration); final Iterator iterator = this.iterator(); while (iterator.hasNext()) outputFormat.writeRecord(iterator.next()); outputFormat.close(); } /** * Specifies that the set of key/value records is empty. This method is primarily used to distinguish between an * empty * uninitialized set and a set deliberately left empty. Further calls to {@link #add(Object...)} will reset the * effect of this method invocation and vice-versa. */ public void setEmpty() { this.setEmpty(true); this.inputFormatClass = null; this.records.clear(); } /** * Sets the typeConfig to the specified value. * * @param typeConfig * the typeConfig to set */ public void setTypeConfig(final TypeConfig typeConfig) { this.typeConfig = typeConfig; } @Override public String toString() { final StringBuilder stringBuilder = new StringBuilder("TestRecords: "); final Iterator iterator = this.iterator(); try { for (int index = 0; index < 25 && iterator.hasNext(); index++) { if (index > 0) stringBuilder.append("; "); this.typeConfig.getTypeStringifier().appendAsString(stringBuilder, iterator.next()); } } catch (final IOException e) { } if (iterator.hasNext()) stringBuilder.append("..."); return stringBuilder.toString(); } /** * Returns an Iterator that iterates through the records as they are provided by the ad hoc expression or the file. * Note that this iterator is not guaranteed to provide a result that is different from {@link #iterator()}. * It simply skips the sorting step, it will not shuffle sorted records. * * @return an Iterator that iterates through the records as they are provided by the ad hoc expression or the file. */ public Iterator unsortedIterator() { if (this.isEmpty() || !this.isInitialized()) return this.EMPTY_ITERATOR; if (this.isAdhoc()) return this.records.iterator(); if (this.inputFormatClass != null) return this.getInputFileIterator(this.typeConfig); return this.EMPTY_ITERATOR; } /** * Returns the records. * * @return the records */ protected List getRecords() { return this.records; } /** * Uses {@link UnilateralSortMerger} to sort the files of the {@link SplitInputIterator}. */ private Iterator createSortedIterator(final Iterator inputFileIterator, final TypeConfig typeConfig) { final int memSize = 10; try { final StringBuilder testName = new StringBuilder(); final StackTraceElement[] stackTrace = new Throwable().getStackTrace(); for (int index = 0; index < stackTrace.length; index++) if (!stackTrace[index].getClassName().startsWith("eu.stratosphere.core.testing.")) { testName.append(stackTrace[index].toString()); break; } // instantiate a sort-merger final AbstractTask parentTask = new AbstractTask() { @Override public void invoke() throws Exception { } @Override public void registerInputOutput() { } @Override public String toString() { return "TestPair Sorter " + testName; } }; final TypeComparator comparator = typeConfig.getTypeComparator(); final TypeSerializer serializer = typeConfig.getTypeSerializerFactory().getSerializer(); final UnilateralSortMerger sortMerger = new UnilateralSortMerger(TestEnvironment.getInstance().getMemoryManager(), TestEnvironment.getInstance().getIoManager(), new TestRecordReader(inputFileIterator), parentTask, serializer, comparator, memSize * 1024L * 1024L, 2, 0.7f); this.closableManager.add(sortMerger); // obtain and return a grouped iterator from the sort-merger return new ImmutableRecordIterator(serializer, sortMerger.getIterator()); } catch (final MemoryAllocationException mae) { throw new RuntimeException( "MemoryManager is not able to provide the required amount of memory for ReduceTask", mae); } catch (final IOException ioe) { throw new RuntimeException("IOException caught when obtaining SortMerger for ReduceTask", ioe); } catch (final InterruptedException iex) { throw new RuntimeException("InterruptedException caught when obtaining iterator over sorted data.", iex); } } @SuppressWarnings({ "unchecked", "rawtypes" }) private InputIterator getInputFileIterator(final TypeConfig typeConfig) { final InputIterator inputFileIterator; try { inputFileIterator = new InputIterator(typeConfig.getTypeSerializer(), FormatUtil.openAllInputs((Class) this.inputFormatClass, this.path, this.configuration)); } catch (final IOException e) { Assert.fail("reading values from " + this.path + ": " + StringUtils.stringifyException(e)); return null; } catch (final Exception e) { Assert.fail("creating input format " + StringUtils.stringifyException(e)); return null; } return inputFileIterator; } private boolean isEmpty() { return this.empty; } // protected Iterator getUnsortedIterator() { // if (this.isEmpty()) // return this.EMPTY_ITERATOR; // if (this.isAdhoc()) // return this.records.iterator(); // if (this.inputFormatClass != null) // return this.getInputFileIterator(); // return this.EMPTY_ITERATOR; // } private void setEmpty(final boolean empty) { this.empty = empty; } private final class TestRecordReader implements MutableObjectIterator { private final Iterator inputFileIterator; private final TypeSerializer typeSerializer = GenericTestRecords.this.typeConfig.getTypeSerializer(); private TestRecordReader(final Iterator inputFileIterator) { this.inputFileIterator = inputFileIterator; } /* * (non-Javadoc) * @see eu.stratosphere.util.MutableObjectIterator#next(java.lang.Object) */ @Override public boolean next(final T target) throws IOException { if (this.inputFileIterator.hasNext()) { this.typeSerializer.copyTo(this.inputFileIterator.next(), target); return true; } return false; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy