io.github.h5jan.io.CSVLoader Maven / Gradle / Ivy
/*-
*
* Copyright 2019 Halliburton Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.github.h5jan.io;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;
import org.eclipse.january.DatasetException;
import org.eclipse.january.IMonitor;
import org.eclipse.january.dataset.AbstractDataset;
import org.eclipse.january.dataset.Dataset;
import org.eclipse.january.dataset.DatasetFactory;
import org.eclipse.january.dataset.Maths;
import io.github.h5jan.core.DataFrame;
/**
* Read wells in 2017 SEG ML format
*/
class CsvLoader extends AbstractStreamLoader implements IStreamLoader {
public static final String DEPTH_COL_NAME = "DEPTH";
private int limit = Integer.MAX_VALUE;
@Override
public DataFrame load(InputStream stream, Configuration configuration, IMonitor mon) throws IOException, DatasetException {
final Iterable records = CSVFormat.RFC4180.withFirstRecordAsHeader()
.parse( new InputStreamReader( stream, "UTF-8" ) );
Map> logsMap = readWellSamples(configuration.getFilterName(), records, mon );
List logs = logsMap.entrySet().stream().map( CsvLoader::exactLogSamplesToData ).collect( Collectors.toList() );
String name = configuration.getFileName();
final String frameName = name.substring( 0, name.indexOf( '.' ) );
return new DataFrame(frameName, AbstractDataset.FLOAT32, logs);
}
private Map> readWellSamples( String logNameFilter, Iterable records,
final IMonitor mon ) throws IOException {
int count = 0;
Map> result = new LinkedHashMap<>(); // Order must be kept
RECORD_LOOP: for (CSVRecord csvRecord : records) {
count++;
if (count>getLimit()) { // Can use limit to truncate data read
break RECORD_LOOP;
}
for (Map.Entry col : csvRecord.toMap().entrySet()) {
final String logName = col.getKey().trim();
if (logNameFilter != null && !Pattern.matches( logNameFilter, logName )) continue;
final String logValue = col.getValue().trim();
if (isCancelled( mon )) {
throw new IOException( "The load job was cancelled" );
}
List