
eu.stratosphere.arraymodel.io.ArrayOutputFormat Maven / Gradle / Ivy
The newest version!
/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.arraymodel.io;
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import eu.stratosphere.api.common.operators.FileDataSink;
import eu.stratosphere.api.common.io.FileOutputFormat;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.types.Value;
public abstract class ArrayOutputFormat extends FileOutputFormat implements ArrayModelOutputFormat {
private static final long serialVersionUID = 1L;
private static final String RECORD_DELIMITER_PARAMETER = "arraymodel.output.delimiter";
private static final String FIELD_DELIMITER_PARAMETER = "arraymodel.outout.field-delimiter";
private static final String ENCODING_PARAMETER = "arraymodel.output.encoding";
private static final String LENIENT_PARSING = "arraymodel.output.lenient";
// --------------------------------------------------------------------------------------------
private OutputStreamWriter wrt;
private String fieldDelimiter;
private String recordDelimiter;
private String charsetName;
private boolean lenient;
// --------------------------------------------------------------------------------------------
@Override
public void configure(Configuration parameters) {
super.configure(parameters);
this.recordDelimiter = parameters.getString(RECORD_DELIMITER_PARAMETER, AbstractConfigBuilder.NEWLINE_DELIMITER);
if (this.recordDelimiter == null) {
throw new IllegalArgumentException("The delimiter in the DelimitedOutputFormat must not be null.");
}
this.charsetName = parameters.getString(ENCODING_PARAMETER, null);
this.fieldDelimiter = parameters.getString(FIELD_DELIMITER_PARAMETER, "|");
this.lenient = parameters.getBoolean(LENIENT_PARSING, false);
}
@Override
public void open(int taskNumber) throws IOException {
super.open(taskNumber);
this.wrt = this.charsetName == null ? new OutputStreamWriter(new BufferedOutputStream(this.stream, 4096)) :
new OutputStreamWriter(new BufferedOutputStream(this.stream, 4096), this.charsetName);
}
@Override
public void close() throws IOException {
if(wrt != null) {
this.wrt.close();
}
super.close();
}
// --------------------------------------------------------------------------------------------
@Override
public void writeRecord(Value[] record) throws IOException {
for (int i = 0; i < record.length; i++) {
final Value v = record[i];
if (v != null) {
if (i != 0) {
this.wrt.write(this.fieldDelimiter);
}
this.wrt.write(v.toString());
} else if (this.lenient) {
if (i != 0) {
this.wrt.write(this.fieldDelimiter);
}
} else {
throw new RuntimeException("Cannot serialize record with value at position: " + i);
}
}
// add the record delimiter
this.wrt.write(this.recordDelimiter);
}
// ============================================================================================
/**
* Creates a configuration builder that can be used to set the input format's parameters to the config in a fluent
* fashion.
*
* @return A config builder for setting parameters.
*/
public static ConfigBuilder configureArrayFormat(FileDataSink target) {
return new ConfigBuilder(target.getParameters());
}
/**
* Abstract builder used to set parameters to the input format's configuration in a fluent way.
*/
protected static abstract class AbstractConfigBuilder extends FileOutputFormat.AbstractConfigBuilder
{
private static final String NEWLINE_DELIMITER = "\n";
// --------------------------------------------------------------------
/**
* Creates a new builder for the given configuration.
*
* @param targetConfig The configuration into which the parameters will be written.
*/
protected AbstractConfigBuilder(Configuration config) {
super(config);
}
// --------------------------------------------------------------------
/**
* Sets the delimiter to be a single character, namely the given one. The character must be within
* the value range 0
to 127
.
*
* @param delimiter The delimiter character.
* @return The builder itself.
*/
public T recordDelimiter(char delimiter) {
if (delimiter == '\n') {
this.config.setString(RECORD_DELIMITER_PARAMETER, NEWLINE_DELIMITER);
} else {
this.config.setString(RECORD_DELIMITER_PARAMETER, String.valueOf(delimiter));
}
@SuppressWarnings("unchecked")
T ret = (T) this;
return ret;
}
/**
* Sets the delimiter to be the given string. The string will be converted to bytes for more efficient
* comparison during input parsing. The conversion will be done using the platforms default charset.
*
* @param delimiter The delimiter string.
* @return The builder itself.
*/
public T recordDelimiter(String delimiter) {
this.config.setString(RECORD_DELIMITER_PARAMETER, delimiter);
@SuppressWarnings("unchecked")
T ret = (T) this;
return ret;
}
/**
* Sets the charset that will be used to encode the strings binary.
* The charset must be available on the processing nodes, otherwise an exception will be raised at
* runtime.
*
* @param charsetName The name of the encoding character set.
* @return The builder itself.
*/
public T encoding(String charsetName) {
this.config.setString(ENCODING_PARAMETER, charsetName);
@SuppressWarnings("unchecked")
T ret = (T) this;
return ret;
}
/**
* Sets the delimiter that delimits the individual fields in the records textual output representation.
*
* @param delimiter The character to be used as a field delimiter.
* @return The builder itself.
*/
public T fieldDelimiter(char delimiter) {
this.config.setString(FIELD_DELIMITER_PARAMETER, String.valueOf(delimiter));
@SuppressWarnings("unchecked")
T ret = (T) this;
return ret;
}
/**
* Sets the leniency for the serializer. A lenient serializer simply skips missing fields and null
* fields in the record, while a non lenient one throws an exception.
*
* @param lenient True, if the serializer should be lenient, false otherwise.
* @return The builder itself.
*/
public T lenient(boolean lenient) {
this.config.setBoolean(LENIENT_PARSING, lenient);
@SuppressWarnings("unchecked")
T ret = (T) this;
return ret;
}
}
/**
* A builder used to set parameters to the input format's configuration in a fluent way.
*/
public static final class ConfigBuilder extends AbstractConfigBuilder
{
/**
* Creates a new builder for the given configuration.
*
* @param targetConfig The configuration into which the parameters will be written.
*/
protected ConfigBuilder(Configuration targetConfig) {
super(targetConfig);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy