com.klarna.hiverunner.data.TsvFileParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hiverunner Show documentation
Show all versions of hiverunner Show documentation
HiveRunner is a unit test framework based on JUnit (4 or 5) that enables TDD development of Hive SQL without the need of any installed dependencies.
package com.klarna.hiverunner.data;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.hive.hcatalog.data.schema.HCatSchema;
import com.google.common.base.Splitter;
/**
* A {@link FileParser} for parsing data out of a TSV file.
*/
public class TsvFileParser implements FileParser {
private static final String DEFAULT_DELIMITER = "\t";
private static final String DEFAULT_NULL_VALUE = "";
private Splitter splitter;
private Object nullValue;
private Charset charset;
public TsvFileParser() {
withDelimiter(DEFAULT_DELIMITER);
withNullValue(DEFAULT_NULL_VALUE);
withCharset(StandardCharsets.UTF_8);
}
/**
* Use the provided delimiter. The default is a tab.
*/
public TsvFileParser withDelimiter(String delimiter) {
splitter = Splitter.on(delimiter);
return this;
}
/**
* Use the provided null value. When a column's value equals the null value it will be replaced with null. The default
* is an empty string.
*/
public TsvFileParser withNullValue(Object nullValue) {
this.nullValue = nullValue;
return this;
}
/**
* Use the provided {@link Charset}. The default is UTF-8.
*/
public TsvFileParser withCharset(Charset charset) {
this.charset = charset;
return this;
}
@Override
public List
© 2015 - 2024 Weber Informatics LLC | Privacy Policy