berlin.yuna.logic.CsvReader Maven / Gradle / Ivy
package berlin.yuna.logic;
import berlin.yuna.model.CsvIndexRow;
import berlin.yuna.model.CsvRow;
import berlin.yuna.model.IoCsvException;
import java.io.IOException;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static berlin.yuna.logic.FileExtraction.extractFile;
import static berlin.yuna.logic.FileUtils.getFile;
import static berlin.yuna.logic.FileUtils.getResourceFile;
import static berlin.yuna.model.CsvRow.csvRowOf;
import static berlin.yuna.model.CsvRow.validateSeparator;
import static java.nio.charset.StandardCharsets.UTF_8;
public class CsvReader {
private char[] separator = new char[]{','};
private boolean autoSep = false;
private boolean unzip = false;
private Charset charset = UTF_8;
private long skipLines = -1;
/**
* @param file the path to the file - or path to a resource
* @param consumer consumes row
*/
public void consume(final Path file, final Consumer consumer) {
try (final Stream stream = stream(file)) {
final AtomicLong index = new AtomicLong(0);
stream.forEach(csvRow -> consumer.accept(CsvIndexRow.csvIndexRowOf(index.getAndIncrement(), csvRow)));
}
}
/**
* @param file the path to the file - or path to a resource
* @return the {@link CsvRow} from the file as a Stream
*/
public List readAllRows(final Path file) {
try (final Stream stream = stream(file)) {
return stream.collect(Collectors.toList());
}
}
/**
* '{@link Stream}<{@link CsvRow}>' must be closed with 'try'-with-resources statement
* The returned stream encapsulates a {@link Reader}. If timely
* disposal of file system resources is required, the try-with-resources
* construct should be used to ensure that the stream's
* {@link Stream#close close} method is invoked after the stream operations
* are completed.
* See example: {@link CsvReader#readAllRows(Path)}
*
* @param file the path to the file - or path to a resource
* @return the {@link CsvRow} from the file as a Stream
*/
public Stream stream(final Path file) {
final Path tmpFile = extract(file, unzip);
final char[] sep = autoSep ? new char[]{FileUtils.detectSeparator(tmpFile, charset)} : separator;
try {
final Stream stream = Files.lines(tmpFile, charset);
return stream.skip(skipLines > 0 ? skipLines : 0)
.map(row -> csvRowOf(row, sep))
.filter(csvRow -> !csvRow.stream().allMatch(CsvReader::isNullOrEmpty))
.onClose(() -> {
stream.close();
deleteTmpFile(file, tmpFile);
});
} catch (IOException e) {
deleteTmpFile(file, tmpFile);
throw new IoCsvException("File read error [" + file + "]", e);
}
}
/**
* @return new configurable {@link CsvReader}
*/
public static CsvReader csvReader() {
return new CsvReader();
}
/**
* @return separator to use for splitting the csv row
*/
public char[] separator() {
return separator;
}
/**
* @param separator Splits the CSV rows at the given separator
* Included fallback: [',']
* @return self [{@link CsvReader}]
*/
public CsvReader separator(final char... separator) {
this.separator = validateSeparator(separator);
return this;
}
/**
* @return On true detects the separator automatically
*/
public boolean autoSep() {
return autoSep;
}
/**
* @param autoSep - On true detects the separator automatically
* @return self [{@link CsvReader}]
*/
public CsvReader autoSep(final boolean autoSep) {
this.autoSep = autoSep;
return this;
}
/**
* @return On true detects and unzips the CSV file automatically
*/
public boolean unzip() {
return unzip;
}
/**
* @param unzip - On true detects and unzips the CSV file automatically
* @return self [{@link CsvReader}]
*/
public CsvReader unzip(final boolean unzip) {
this.unzip = unzip;
return this;
}
/**
* @return The charset to use for decoding the CSV file
*/
public Charset charset() {
return charset;
}
/**
* @param charset The charset to use for decoding the CSV file
* @return self [{@link CsvReader}]
*/
public CsvReader charset(final Charset charset) {
this.charset = charset == null ? UTF_8 : charset;
return this;
}
/**
* @return lines to skip while reading csv
*/
public long skipLines() {
return skipLines;
}
/**
* @param skipLines lines to skip while reading csv
* @return self [{@link CsvReader}]
*/
public CsvReader skipLines(final long skipLines) {
this.skipLines = skipLines;
return this;
}
private static boolean isNullOrEmpty(final String input) {
return input == null || input.trim().isEmpty();
}
protected CsvReader() {
}
private void deleteTmpFile(final Path file, final Path path) {
if (file.compareTo(path) != 0) {
FileUtils.deleteTmpFile(path);
}
}
private Path extract(final Path file, final boolean extract) {
final Path tmpFile = getFile(file, () -> getResourceFile(file));
return extract ? extractFile(tmpFile) : tmpFile;
}
}